a
    hf                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ dddZh d	Zee eee d
ddZG dd de
ZdS )    N)Path)IterableListTupleUnion)Dataset)download_url_to_fileZ@209a8b4cd265013e96f4658632a9878103b0c5abf62b50d4ef3ae1be226b29e4Z@408ccaae803641c6d7b626b6299949320c2dbca96b2220fd3fb17887b023b027)Bhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7bJhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols>8   z)CLOSE-PARENz"CLOSE-QUOTEz'END-INNER-QUOTEz)RIGHT-PARENz(LEFT-PARENz{BRACEz(BEGIN-PARENSz-HYPHENz'INNER-QUOTEz(PARENTHESESz.PERIODz}RIGHT-BRACEz
#HASH-MARKz%PERCENTz:COLONz'QUOTEz{LEFT-BRACEz;SEMI-COLONz(PARENSz"END-OF-QUOTEz+PLUSz
"END-QUOTEz(PARENz)END-PARENTHESESz--DASHz)UN-PARENTHESESz.POINTz
)END-PARENz'SINGLE-QUOTEz)END-PARENSz;SEMI-COLON(1)z"DOUBLE-QUOTEz.DOTz!EXCLAMATION-POINTz
&AMPERSANDz(OPEN-PARENTHESESz-DASHz}CLOSE-BRACEz
'END-QUOTEz)CLOSE-PARENTHESESz...ELLIPSISz"QUOTEz)PARENz.DECIMALz)END-THE-PARENz{OPEN-BRACEz#SHARP-SIGNz
.FULL-STOPz#POUND-SIGNz/SLASHz)PARENSz(IN-PARENTHESESz"UNQUOTEz
"IN-QUOTESz?QUESTION-MARKz,COMMA)linesexclude_punctuationsreturnc                 C   s   t d}g }| D ]}|r|dr&q| d\}}|tv rn|rFq|drVd}n|drfd}n|d }t |d|}|d}|||f q|S )	Nz
\([0-9]+\)z;;;z  z...z--r     )recompile
startswithstripsplit_PUNCTUATIONSsubappend)r   r   Z_alt_reZcmudictlinewordZphones r   I/var/www/auris/lib/python3.9/site-packages/torchaudio/datasets/cmudict.py_parse_dictionaryJ   s$    



r   c                	   @   s|   e Zd ZdZdddddeeef eeeeddd	d
Ze	e
eee f dddZe	dddZeee dddZdS )CMUDictaZ  *CMU Pronouncing Dictionary* :cite:`cmudict` (CMUDict) dataset.

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.
        exclude_punctuations (bool, optional):
            When enabled, exclude the pronounciation of punctuations, such as
            `!EXCLAMATION-POINT` and `#HASH-MARK`.
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
        url (str, optional):
            The URL to download the dictionary from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b"``)
        url_symbols (str, optional):
            The URL to download the list of symbols from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols"``)
    TFr	   r
   )downloadurlurl_symbolsN)rootr   r   r   r    r   c          
      C   sB  || _ t|| _tj| js,td| | jtj| }| jtj| }tj|s|sntd| t	
|d }t||| tj|s|std| t	
|d }t||| t|d$}	dd |	 D | _W d    n1 s0    Y  t|ddd$}	t|	 | j d	| _W d    n1 s40    Y  d S )
Nz#The root directory does not exist; z`The dictionary file is not found in the following location. Set `download=True` to download it. z\The symbol file is not found in the following location. Set `download=True` to download it. rc                 S   s   g | ]}|  qS r   )r   ).0r   r   r   r   
<listcomp>       z$CMUDict.__init__.<locals>.<listcomp>zlatin-1)encoding)r   )r   r   Z
_root_pathospathisdirRuntimeErrorbasenameexists
_CHECKSUMSgetr   open	readlines_symbolsr   _dictionary)
selfr!   r   r   r   r    Z	dict_fileZsymbol_fileZchecksumtextr   r   r   __init__{   s8    

2zCMUDict.__init__)nr   c                 C   s
   | j | S )a  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded.

        Returns:
            Tuple of a word and its phonemes

            str:
                Word
            List[str]:
                Phonemes
        )r2   )r3   r6   r   r   r   __getitem__   s    zCMUDict.__getitem__)r   c                 C   s
   t | jS )N)lenr2   r3   r   r   r   __len__   s    zCMUDict.__len__c                 C   s
   | j  S )zLlist[str]: A list of phonemes symbols, such as ``"AA"``, ``"AE"``, ``"AH"``.)r1   copyr9   r   r   r   symbols   s    zCMUDict.symbols)T)__name__
__module____qualname____doc__r   strr   boolr5   intr   r   r7   r:   propertyr<   r   r   r   r   r   i   s$    
)r   )r'   r   pathlibr   typingr   r   r   r   Ztorch.utils.datar   Ztorchaudio._internalr   r-   r   rA   rB   r   r   r   r   r   r   <module>   s   <