o
    vZh+                     @   s@   d dl mZmZmZ d dlZdd Zdd Zdd Zd	d
 ZdS )    )PreProcessorRegexPreProcessorSubsymbolsNc                 C   s   t tjdd dd| S )zAdd a space after tone-modifying punctuation.

    Because the `tone_marks` tokenizer case will split after a tone-modifying
    punctuation mark, make sure there's whitespace after.

    c                 S   
   d | S )Nz(?<={})formatx r
   L/var/www/auris/lib/python3.10/site-packages/gtts/tokenizer/pre_processors.py<lambda>      
 ztone_marks.<locals>.<lambda> search_argssearch_funcrepl)r   r   Z
TONE_MARKSruntextr
   r
   r   
tone_marks   s   r   c                 C   s   t ddd dd| S )zPRe-form words cut by end-of-line hyphens.

    Remove "<hyphen><newline>".

    -c                 S   r   )Nz{}
r   r   r
   r
   r   r      r   zend_of_line.<locals>.<lambda> r   )r   r   r   r
   r
   r   end_of_line   s
   
r   c                 C   s   t tjdd dtjd| S )a  Remove periods after an abbreviation from a list of known
    abbreviations that can be spoken the same without that period. This
    prevents having to handle tokenization of that period.

    Note:
        Could potentially remove the ending period of a sentence.

    Note:
        Abbreviations that Google Translate can't pronounce without
        (or even with) a period should be added as a word substitution with a
        :class:`PreProcessorSub` pre-processor. Ex.: 'Esq.', 'Esquire'.

    c                 S   r   )Nz(?<={})(?=\.).r   r   r
   r
   r   r   /   r   zabbreviations.<locals>.<lambda>r   )r   r   r   flags)r   r   ZABBREVIATIONSre
IGNORECASEr   r   r
   r
   r   abbreviations   s   r   c                 C   s   t tjd| S )zWord-for-word substitutions.)Z	sub_pairs)r   r   Z	SUB_PAIRSr   r   r
   r
   r   word_sub5   s   r   )	Zgtts.tokenizerr   r   r   r   r   r   r   r   r
   r
   r
   r   <module>   s   