
    hg                         S SK r S SKrS SKJr  S SKJrJrJr  S SKrS SK	J
r
  S SKJr  S SKJrJr  SrSrS	r/ S
Qr " S S\
5      rS\S\S\4S jrg)    N)Path)OptionalTupleUnion)Dataset)download_url_to_file)_extract_tar_load_waveformz6https://speech.fit.vutbr.cz/files/quesst14Database.tgzi@  @4f869e06bc066bbe9c5dde31dbd3909a0870d70291110ebbb38878dcbc2fc5e4)albanianbasqueczech	nnenglishromanianslovakc                       \ rS rSrSr  SS\\\4   S\S\\   S\	SS4
S	 jjr
S
\S\\\\4   4S jrS
\S\\R                  \\4   4S jrS\4S jrSrg)QUESST14   ak  *QUESST14* :cite:`Mir2015QUESST2014EQ` dataset.

Args:
    root (str or Path): Root directory where the dataset's top level directory is found
    subset (str): Subset of the dataset to use. Options: [``"docs"``, ``"dev"``, ``"eval"``].
    language (str or None, optional): Language to get dataset for.
        Options: [``None``, ``albanian``, ``basque``, ``czech``, ``nnenglish``, ``romanian``, ``slovak``].
        If ``None``, dataset consists of all languages. (default: ``"nnenglish"``)
    download (bool, optional): Whether to download the dataset if it is not found at root path.
        (default: ``False``)
rootsubsetlanguagedownloadreturnNc                 n   US;  a  [        S5      eUb%  U[        ;  a  [        S[        [        5       35      e[        R                  " U5      n[        R
                  R                  [        5      n[        R
                  R                  X5      nUR                  SS5      S   n[        R
                  R                  X5      U l
        [        R
                  R                  U R                  5      (       dT  [        R
                  R                  U5      (       d%  U(       d  [        S5      e[        [        U[        S9  [!        Xa5        US	:X  a  [#        U R                  US
5      U l        g US:X  a  [#        U R                  US5      U l        g US:X  a  [#        U R                  US5      U l        g g )N)docsdevevalz/`subset` must be one of ['docs', 'dev', 'eval']z"`language` must be None or one of .   r   z9Dataset not found. Please use `download=True` to download)hash_prefixr   zlanguage_key_utterances.lstr   zlanguage_key_dev.lstr   zlanguage_key_eval.lst)
ValueError
_LANGUAGESstrosfspathpathbasenameURLjoinrsplit_pathisdirisfileRuntimeErrorr   	_CHECKSUMr	   filter_audio_pathsdata)selfr   r   r   r   r'   archives          T/var/www/auris/envauris/lib/python3.13/site-packages/torchaudio/datasets/quesst14.py__init__QUESST14.__init__&   sD    00NOOHJ$>A#j/ARSTT yy77##C('',,t.??3*1-WW\\$1
ww}}TZZ((77>>'**&'bcc$S'yI'V*4::xA^_DIu_*4::xAWXDIv*4::xAXYDI     nc                     U R                   U   n[        R                  R                  X R                  5      nU[
        UR                  S5      R                  4$ )aW  Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
but otherwise returns the same fields as :py:func:`__getitem__`.

Args:
    n (int): The index of the sample to be loaded

Returns:
    Tuple of the following items;

    str:
        Path to audio
    int:
        Sample rate
    str:
        File name
 )r1   r$   r&   relpathr+   SAMPLE_RATEwith_suffixname)r2   r8   
audio_pathr;   s       r4   get_metadataQUESST14.get_metadataJ   sE    " YYq\
''//*jj9Z%;%;B%?%D%DDDr7   c                 p    U R                  U5      n[        U R                  US   US   5      nU4USS -   $ )zLoad the n-th sample from the dataset.

Args:
    n (int): The index of the sample to be loaded

Returns:
    Tuple of the following items;

    Tensor:
        Waveform
    int:
        Sample rate
    str:
        File name
r      N)r@   r
   r+   )r2   r8   metadatawaveforms       r4   __getitem__QUESST14.__getitem___   sA      $$Q'!$**hqk8A;G{Xab\))r7   c                 ,    [        U R                  5      $ )N)lenr1   )r2   s    r4   __len__QUESST14.__len__s   s    499~r7   )r+   r1   )r   F)__name__
__module____qualname____firstlineno____doc__r   r#   r   r   boolr5   intr   r@   torchTensorrF   rJ   __static_attributes__ r7   r4   r   r      s    
  #."ZCI"Z "Z 3-	"Z
 "Z 
"ZHEc EeCcM&: E**S *U5<<c+A%B *( r7   r   r&   r   lst_namec                 4   / n[        U 5      n [        U S-  U-  5       nU HX  nUR                  5       R                  5       u  pgUb  Xq:w  a  M-  [        R
                  " SSU5      nUR                  X-  5        MZ     SSS5        U$ ! , (       d  f       U$ = f)z+Extract audio paths for the given language.scoringNz^.*?\/r:   )r   openstripsplitresubappend)r&   r   rW   audio_pathsfliner?   langs           r4   r0   r0   w   s     K:D	dY)	*aD#zz|113J#(8	2z:Jt01  
+  
+	* s   AB
B)r$   r]   pathlibr   typingr   r   r   rS   torch.utils.datar   torchaudio._internalr   torchaudio.datasets.utilsr	   r
   r(   r<   r/   r"   r   r#   r0   rV   r7   r4   <module>ri      sf    	 	  ) )  $ 5 B ?N	
[w [|
 r7   