
    h!              
           S SK r S SKJr  S SKJrJr  S SKrS SKJr  S SK	J
r
  S SKJr  S SKJr  SS	S
SS/ SQSS.SSSSS/ SQSS.SSSSS/ SQSS.S.r " S S\
5      rg)    N)Path)TupleUnion)Tensor)Dataset)download_url_to_file)_extract_tarTEDLIUM_release1z:http://www.openslr.org/resources/7/TEDLIUM_release1.tar.gz@30301975fd8c5cac4040c261c0852f57cfa8adbbad2ce78e77e4986957445f27 train)r   testdevzTEDLIUM.150K.dic)folder_in_archiveurlchecksum	data_pathsubsetsupported_subsetsdictTEDLIUM_release2z;http://www.openslr.org/resources/19/TEDLIUM_release2.tar.gz@93281b5fcaaae5c88671c9d000b443cb3c7ea3499ad12010b3934ca41a7b9c58zTEDLIUM.152k.diczTEDLIUM_release-3z9http://www.openslr.org/resources/51/TEDLIUM_release-3.tgz@ad1e454d14d1ad550bc2564c462d87c7a7ec83d4dc2b9210f22ab4973b9eccdbzdata/)release1release2release3c                       \ rS rSrSr    SS\\\4   S\S\S\S\SS	4S
 jjr	S\S\
S\S\\\
\\
\
\
4   4S jrSS\S\S\S\
S\\
/4
S jjrS\
S\\\
\\
\
\
4   4S jrS\
4S jr\S 5       rSrg	)TEDLIUM+   a  *Tedlium* :cite:`rousseau2012tedlium` dataset (releases 1,2 and 3).

Args:
    root (str or Path): Path to the directory where the dataset is found or downloaded.
    release (str, optional): Release version.
        Allowed values are ``"release1"``, ``"release2"`` or ``"release3"``.
        (default: ``"release1"``).
    subset (str, optional): The subset of dataset to use. Valid options are ``"train"``, ``"dev"``,
        and ``"test"``. Defaults to ``"train"``.
    download (bool, optional):
        Whether to download the dataset if it is not found at root path. (default: ``False``).
    audio_ext (str, optional): extension for audio file (default: ``".sph"``)
rootreleaser   download	audio_extreturnNc                 z  ^ XPl         U[        R                  5       ;   a.  [        U   S   n[        U   S   nU(       a  UO[        U   S   nO-[        SR	                  U[        R                  5       5      5      eU[        U   S   ;  a%  [        SR	                  U[        U   S   5      5      e[
        R                  " U5      n[
        R                  R                  U5      n[
        R                  R                  X5      n	UR                  S5      S   nUS	:X  a]  US
:X  a0  [
        R                  R                  X[        U   S   5      U l        OW[
        R                  R                  XSU5      U l        O0[
        R                  R                  X[        U   S   U5      U l        U(       at  [
        R                  R                  U R                  5      (       dE  [
        R                  R                  U	5      (       d  [        U   S   n
[        XyU
S9  [        U	5        OG[
        R                  R!                  U R                  5      (       d  [        SU R                   S35      e/ U l        [
        R                  R                  U R                  S5      n[%        [
        R&                  " U5      5       H  mTR)                  S5      (       d  M  [
        R                  R                  U R                  ST5      n[+        U5       n[-        UR/                  5       5      nTR1                  SS5      mU R"                  R3                  U4S j[5        U5       5       5        S S S 5        M     [
        R                  R                  X[        U   S   5      U l        S U l        g ! , (       d  f       M  = f)Nr   r   r   zFThe release {} does not match any of the supported tedlium releases{} r   zDThe subset {} does not match any of the supported tedlium subsets{} .r   r   r   r   legacyr   )hash_prefixz	The path zT doesn't exist. Please check the ``root`` path or set `download=True` to download itstm.stmr   c              3   ,   >#    U  H	  nTU4v   M     g 7f)N ).0linefiles     S/var/www/auris/envauris/lib/python3.13/site-packages/torchaudio/datasets/tedlium.py	<genexpr>#TEDLIUM.__init__.<locals>.<genexpr>~   s     )L844,8s   r   )
_ext_audio_RELEASE_CONFIGSkeysRuntimeErrorformatosfspathpathbasenamejoinsplit_pathisdirisfiler   r	   exists	_filelistsortedlistdirendswithopenlen	readlinesreplaceextendrange
_dict_path_phoneme_dict)selfr    r!   r   r"   r#   r   r   r;   archiver   stm_pathflr/   s                 @r0   __init__TEDLIUM.__init__:   s    $&++-- 0 9:M N"7+E2C%V+;G+DX+NF X__$))+  )'23FGGV]]$W-.AB  yy77##C('',,t.>>#&q)j  WW\\$CST[C\]hCij
WW\\$8VT
d?OPW?XYd?egmnDJ77==,,ww~~g../8DH(8LW%77>>$**--"

| ,[ [  77<<

E22::h/0D}}V$$77<<

E4@(^qAKKM*A<<3DNN)))L58)LL $^ 1 '',,t@PQX@YZ`@ab! $^s   AN++
N:	fileidr.   r:   c                 v   [         R                  R                  USU5      n[        US-   5       nUR	                  5       U   nUR                  SS5      u  pxppnSSS5        [         R                  R                  USU5      nU R                  XR                  -   W
WS9u  pXWWW	W4$ ! , (       d  f       NU= f)a  Loads a TEDLIUM dataset sample given a file name and corresponding sentence name.

Args:
    fileid (str): File id to identify both text and audio files corresponding to the sample
    line (int): Line identifier for the sample inside the text file
    path (str): Dataset root path

Returns:
    (Tensor, int, str, int, int, int):
    ``(waveform, sample_rate, transcript, talk_id, speaker_id, identifier)``
r)   r*       Nsph)
start_timeend_time)r8   r:   r<   rF   rH   r=   _load_audior3   )rN   rU   r.   r:   transcript_pathrQ   
transcripttalk_id_
speaker_idrZ   r[   
identifier	wave_pathwaveformsample_rates                   r0   _load_tedlium_itemTEDLIUM._load_tedlium_item   s     '',,tUF;/F*+qt,JS]ScScdgijSkPG
j , GGLLuf5	 $ 0 0__1LYcnv 0 wz7J
SS ,+s   +B**
B8rZ   r[   re   c                     [        [        U5      U-  5      n[        [        U5      U-  5      nX#U-
  S.n[        R                  " U40 UD6$ )a  Default load function used in TEDLIUM dataset, you can overwrite this function to customize functionality
and load individual sentences from a full ted audio talk file.

Args:
    path (str): Path to audio file
    start_time (int): Time in seconds where the sample sentence stars
    end_time (int): Time in seconds where the sample sentence finishes
    sample_rate (float, optional): Sampling rate

Returns:
    [Tensor, int]: Audio tensor representation and sample rate
)frame_offset
num_frames)intfloat
torchaudioload)rN   r:   rZ   r[   re   kwargss         r0   r\   TEDLIUM._load_audio   sL     z*[89
uX45",z<QRt.v..    nc                 \    U R                   U   u  p#U R                  X#U R                  5      $ )a4  Load the n-th sample from the dataset.

Args:
    n (int): The index of the sample to be loaded

Returns:
    Tuple of the following items;

    Tensor:
        Waveform
    int:
        Sample rate
    str:
        Transcript
    int:
        Talk ID
    int:
        Speaker ID
    int:
        Identifier
)rB   rf   r>   )rN   rr   rU   r.   s       r0   __getitem__TEDLIUM.__getitem__   s+    , ~~a(&&vTZZ@@rq   c                 ,    [        U R                  5      $ )znTEDLIUM dataset custom function overwritting len default behaviour.

Returns:
    int: TEDLIUM dataset length
)rG   rB   )rN   s    r0   __len__TEDLIUM.__len__   s     4>>""rq   c                 j   U R                   (       dx  0 U l         [        U R                  SSS9 nUR                  5        H?  nUR	                  5       R                  5       n[        USS 5      U R                   US   '   MA     SSS5        U R                   R                  5       $ ! , (       d  f       N(= f)zsdict[str, tuple[str]]: Phonemes. Mapping from word to tuple of phonemes.
Note that some words have empty phonemes.
rzutf-8)encoding   Nr   )rM   rF   rL   rH   stripr=   tuplecopy)rN   rQ   r.   contents       r0   phoneme_dictTEDLIUM.phoneme_dict   s     !!!#DdoosW=KKMD"jjl002G5:712;5GD&&wqz2 * > !!&&((	 >=s   AB$$
B2)rL   r3   rB   r>   rM   )r   r   Fz.sph)i>  )__name__
__module____qualname____firstlineno____doc__r   strr   boolrS   rk   r   r   rf   rl   r\   rt   rw   propertyr   __static_attributes__r,   rq   r0   r   r   +   s   " "G"CIG" G" 	G"
 G" G" 
G"RT TC Ts TuVUXZ]_bdgilMlGm T,/ / /% /VY /gmorfs /(AS AU63S#s+J%K A2# # ) )rq   r   )r8   pathlibr   typingr   r   rm   torchr   torch.utils.datar   torchaudio._internalr   torchaudio.datasets.utilsr	   r4   r   r,   rq   r0   <module>r      s    	     $ 5 2
 0KV5" 0LV5" 1JV5"' >o)g o)rq   