
    h.                         S SK r S SKJr  S SKJrJrJr  S SKJr  S SK	J
r
  S SKJr  S SKJrJr  SrS	rS
r/ SQrSSSSSSSS.rS rS\S\S\S\S\S\\   S\\\\\\\4   4S jr " S S\
5      rg)    N)Path)ListTupleUnion)Tensor)Dataset)download_url_to_file)_extract_tar_load_waveformtrain-clean-100LibriSpeechi>  )z	dev-cleanz	dev-otherz
test-cleanz
test-otherr   ztrain-clean-360ztrain-other-500@76f87d090650617fca0cac8f88b9416e0ebf80350acb97b343a85fa903728ab3@12661c48e8c3fe1de2c1caa4c3e135193bfb1811584f11f569dd12645aa84365@39fde525e59672dc6d1551919b1478f724438a95aa55f874b576be21967e6c23@d09c181bba5cf717b3dee7d4d592af11a3ee3a09e08ae025c5506f6ebe961c29@d4ddd1d5a6ab303066f14971d768ee43278a5f2a0aa43dc716b0e64ecbbbf6e2@146a56496217e96c14334a160df97fffedd6e0a04e66b9c5af0d40be3c792ecf@ddb22f27f96ec163645d53215559df6aa36515f26e01dd70798188350adcb6d2)z4http://www.openslr.org/resources/12/dev-clean.tar.gzz4http://www.openslr.org/resources/12/dev-other.tar.gzz5http://www.openslr.org/resources/12/test-clean.tar.gzz5http://www.openslr.org/resources/12/test-other.tar.gzz:http://www.openslr.org/resources/12/train-clean-100.tar.gzz:http://www.openslr.org/resources/12/train-clean-360.tar.gzz:http://www.openslr.org/resources/12/train-other-500.tar.gzc                 .   SnSnX-   n[         R                  R                  X5      n[         R                  R                  X$5      n[         R                  R                  U5      (       d   [        R                  US 5      n[        XeUS9  [        U5        g )Nz$http://www.openslr.org/resources/12/z.tar.gz)hash_prefix)ospathjoinisfile
_CHECKSUMSgetr	   r
   )rooturlbase_urlext_archivefilenamearchivedownload_urlchecksums           _/var/www/auris/envauris/lib/python3.13/site-packages/torchaudio/datasets/librispeech_biasing.py_download_librispeechr&   !   sm    5HK Hggll4*G77<<3L77>>'"">>,5\I    fileidr   folder	ext_audioext_txtblistreturnc                    U=(       d    / nU R                  S5      u  pgnU SU SU 3n	[        R                  R                  X&Xy U 35      n
U SU U 3n[        R                  R                  XXgU5      n/ n[	        U5       nU He  nUR                  5       R                  SS5      u  nnX:X  d  M-  UR                  5        H$  nUU;   d  M  UU;  d  M  UR                  U5        M&       O   [        SU	 35      e S S S 5        U
[        W[        U5      [        U5      [        U5      U4$ ! , (       d  f       N6= f)N-    zTranslation not found for )
splitr   r   r   openstripappendFileNotFoundErrorSAMPLE_RATEint)r(   r   r)   r*   r+   r,   
speaker_id
chapter_idutterance_idfileid_audiofilepath	file_textuttblistftlinefileid_text
transcriptwords                     r%   _get_librispeech_metadatarE   .   sC    KRE+1<<+<(JL !\:,a~>Lww||F
nYK<XYH ,a
|G95IT:9MIH	iBD&*jjl&8&8a&@#K*&,,.Du}X)= - /   $&@$OPP  
 	JJL  
s   ,D05D0D0'D00
D>c                       \ rS rSrSrSrSr\\SS4S\	\
\4   S\
S	\
S
\S\\
   SS4S jjrS\S\\
\\
\\\4   4S jrS\S\\\\
\\\4   4S jrS\4S jrSrg)LibriSpeechBiasingT   aH  *LibriSpeech* :cite:`7178964` dataset with prefix-tree construction and biasing support.

Args:
    root (str or Path): Path to the directory where the dataset is found or downloaded.
    url (str, optional): The URL to download the dataset from,
        or the type of the dataset to dowload.
        Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
        ``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
        ``"train-other-500"``. (default: ``"train-clean-100"``)
    folder_in_archive (str, optional):
        The top-level directory of the dataset. (default: ``"LibriSpeech"``)
    download (bool, optional):
        Whether to download the dataset if it is not found at root path. (default: ``False``).
    blist (list, optional):
        The list of biasing words (default: ``[]``).
z
.trans.txtz.flacFNr   r   folder_in_archivedownloadr,   r-   c                 Z   X l         U[        ;  a  [        SU S[         S35      e[        R                  " U5      n[        R
                  R                  X5      U l        [        R
                  R                  XU5      U l        [        R
                  R                  U R                  5      (       d,  U(       a  [        X5        O[        SU R                   S35      e[        S [        U R                  5      R                  SU R                  -   5       5       5      U l        XPl        g )NzInvalid url 'z' given; please provide one of .zDataset not found at z5. Please set `download=True` to download the dataset.c              3   L   #    U  H  n[        UR                  5      v   M     g 7fN)strstem).0ps     r%   	<genexpr>.LibriSpeechBiasing.__init__.<locals>.<genexpr>   s     d3cac!&&kk3cs   "$z*/*/*)_url_DATA_SUBSETS
ValueErrorr   fspathr   r   _archive_pathisdirr&   RuntimeErrorsortedr   glob
_ext_audio_walkerr,   )selfr   r   rI   rJ   r,   s         r%   __init__LibriSpeechBiasing.__init__i   s     	m#}SE1PQ^P__`abbyyT=WW\\$3?
ww}}TZZ((%d0"+DJJ<7lm  d4

3C3H3HSWSbSbIb3cdd
r'   nc                     U R                   U   n[        X R                  U R                  U R                  U R
                  U R                  5      $ )a  Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
but otherwise returns the same fields as :py:func:`__getitem__`.

Args:
    n (int): The index of the sample to be loaded

Returns:
    Tuple of the following items;

    str:
        Path to audio
    int:
        Sample rate
    str:
        Transcript
    int:
        Speaker ID
    int:
        Chapter ID
    int:
        Utterance ID
    list:
        List of biasing words in the utterance
)r`   rE   rY   rU   r_   _ext_txtr,   )ra   rd   r(   s      r%   get_metadataLibriSpeechBiasing.get_metadata   s>    2 a(		4??\`\i\ikokukuvvr'   c                 p    U R                  U5      n[        U R                  US   US   5      nU4USS -   $ )ar  Load the n-th sample from the dataset.

Args:
    n (int): The index of the sample to be loaded

Returns:
    Tuple of the following items;

    Tensor:
        Waveform
    int:
        Sample rate
    str:
        Transcript
    int:
        Speaker ID
    int:
        Chapter ID
    int:
        Utterance ID
    list:
        List of biasing words in the utterance
r   r1   N)rg   r   rY   )ra   rd   metadatawaveforms       r%   __getitem__LibriSpeechBiasing.__getitem__   sA    0 $$Q'!$--!hqkJ{Xab\))r'   c                 ,    [        U R                  5      $ rN   )lenr`   )ra   s    r%   __len__LibriSpeechBiasing.__len__   s    4<<  r'   )rY   rZ   rU   r`   r,   )__name__
__module____qualname____firstlineno____doc__rf   r_   URLFOLDER_IN_ARCHIVEr   rO   r   boolr   rb   r8   r   rg   r   rl   rp   __static_attributes__ r'   r%   rG   rG   T   s    " HJ
 !2CI  	
  Cy 
6wc weCc3S,H&I w8*S *U63S#s+J%K *8! !r'   rG   )r   pathlibr   typingr   r   r   torchr   torch.utils.datar   torchaudio._internalr	   torchaudio.datasets.utilsr
   r   rw   rx   r7   rV   r   r&   rO   r8   rE   rG   r{   r'   r%   <module>r      s    	  % %  $ 5 B!  =<~== CE CE CE

###$'#47#BE#NRSVi#
3S#sC'(#Li! i!r'   