
    hf                         S SK r S SKrS SKJr  S SKJrJrJrJr  S SK	J
r
  S SKJr  SSS.r1 S	krS
\\   S\S\\   4S jr " S S\
5      rg)    N)Path)IterableListTupleUnion)Dataset)download_url_to_file@209a8b4cd265013e96f4658632a9878103b0c5abf62b50d4ef3ae1be226b29e4@408ccaae803641c6d7b626b6299949320c2dbca96b2220fd3fb17887b023b027)Bhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7bJhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols>8   .DOT+PLUS-DASH"QUOTE'QUOTE(PAREN)PAREN,COMMA--DASH.POINT/SLASH:COLON{BRACE(PARENS)PARENS-HYPHEN.PERIOD"UNQUOTE%PERCENT.DECIMAL
"END-QUOTE
"IN-QUOTES
#HASH-MARK
&AMPERSAND
'END-QUOTE
)END-PAREN
.FULL-STOP#POUND-SIGN#SHARP-SIGN(LEFT-PAREN)END-PARENS...ELLIPSIS;SEMI-COLON{LEFT-BRACE{OPEN-BRACE"CLOSE-QUOTE'INNER-QUOTE(PARENTHESES)CLOSE-PAREN)RIGHT-PAREN}CLOSE-BRACE}RIGHT-BRACE"DOUBLE-QUOTE"END-OF-QUOTE'SINGLE-QUOTE(BEGIN-PARENS)END-THE-PAREN;SEMI-COLON(1)?QUESTION-MARK(IN-PARENTHESES)UN-PARENTHESES'END-INNER-QUOTE)END-PARENTHESES(OPEN-PARENTHESES!EXCLAMATION-POINT)CLOSE-PARENTHESESlinesexclude_punctuationsreturnc                    [         R                  " S5      n/ nU  H  nU(       a  UR                  S5      (       a  M"  UR                  5       R	                  S5      u  pVU[
        ;   a@  U(       a  MV  UR                  S5      (       a  SnOUR                  S5      (       a  SnOUS   n[         R                  " USU5      nUR	                  S5      nUR                  XV45        M     U$ )	Nz
\([0-9]+\)z;;;z  z...z--r     )recompile
startswithstripsplit_PUNCTUATIONSsubappend)rF   rG   _alt_recmudictlinewordphoness          S/var/www/auris/envauris/lib/python3.13/site-packages/torchaudio/datasets/cmudict.py_parse_dictionaryrZ   J   s    jj'G+-Gtu--zz|))$/= # u%%&&Aw
 vvgr4(c"~&/ 2 N    c                       \ rS rSrSr SSSSS.S\\\4   S\S	\S
\S\SS4S jjjr	S\
S\\\\   4   4S jrS\
4S jr\S\\   4S j5       rSrg)CMUDicti   a"  *CMU Pronouncing Dictionary* :cite:`cmudict` (CMUDict) dataset.

Args:
    root (str or Path): Path to the directory where the dataset is found or downloaded.
    exclude_punctuations (bool, optional):
        When enabled, exclude the pronounciation of punctuations, such as
        `!EXCLAMATION-POINT` and `#HASH-MARK`.
    download (bool, optional):
        Whether to download the dataset if it is not found at root path. (default: ``False``).
    url (str, optional):
        The URL to download the dictionary from.
        (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b"``)
    url_symbols (str, optional):
        The URL to download the list of symbols from.
        (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols"``)
Fr   r   )downloadurlurl_symbolsrootrG   r_   r`   ra   rH   Nc                   X l         [        U5      U l        [        R                  R                  U R                  5      (       d  [        SU 35      eU R                  [        R                  R                  U5      -  nU R                  [        R                  R                  U5      -  n[        R                  R                  U5      (       d7  U(       d  [        SU 35      e[        R                  US 5      n[        XFU5        [        R                  R                  U5      (       d7  U(       d  [        SU 35      e[        R                  US 5      n[        XWU5        [        US5       n	U	R                  5        V
s/ s H  oR                  5       PM     sn
U l        S S S 5        [        USSS9 n	[!        U	R                  5       U R                   S9U l        S S S 5        g s  sn
f ! , (       d  f       NO= f! , (       d  f       g = f)Nz#The root directory does not exist; z`The dictionary file is not found in the following location. Set `download=True` to download it. z\The symbol file is not found in the following location. Set `download=True` to download it. rzlatin-1)encoding)rG   )rG   r   
_root_pathospathisdirRuntimeErrorbasenameexists
_CHECKSUMSgetr	   open	readlinesrO   _symbolsrZ   _dictionary)selfrb   rG   r_   r`   ra   	dict_filesymbol_filechecksumtextrV   s              rY   __init__CMUDict.__init__{   s    %9!t*ww}}T__--!DTFKLLOObgg&6&6s&;;	oo(8(8(EEww~~i((";;D+G  "~~c40H :ww~~k**";;F-I  "~~k48H 8D+s#t6:nn6FG6FdZZ\6FGDM $ )S9501AX\XqXqrD 65 H $# 65s*   -G* G%G*4(G;%G**
G8;
H	nc                      U R                   U   $ )zLoad the n-th sample from the dataset.

Args:
    n (int): The index of the sample to be loaded.

Returns:
    Tuple of a word and its phonemes

    str:
        Word
    List[str]:
        Phonemes
)rr   )rs   rz   s     rY   __getitem__CMUDict.__getitem__   s     ""r[   c                 ,    [        U R                  5      $ )N)lenrr   rs   s    rY   __len__CMUDict.__len__   s    4##$$r[   c                 6    U R                   R                  5       $ )zLlist[str]: A list of phonemes symbols, such as ``"AA"``, ``"AE"``, ``"AH"``.)rq   copyr   s    rY   symbolsCMUDict.symbols   s     }}!!##r[   )rr   rf   rq   rG   )T)__name__
__module____qualname____firstlineno____doc__r   strr   boolrx   intr   r   r|   r   propertyr   __static_attributes__ r[   rY   r]   r]   i   s    ( &*'s
 Wg'sCI's #'s
 's 's 's 
'sR#S #U3S	>%: # % % $c $ $r[   r]   )rg   rL   pathlibr   typingr   r   r   r   torch.utils.datar   torchaudio._internalr	   rm   rQ   r   r   rZ   r]   r   r[   rY   <module>r      sh    	 	  / / $ 5 KM SU
9xXc] $ 4PS9 >Q$g Q$r[   