
    h
                         S SK r S SKrS SKJr  S SKJrJrJrJr  S SK	r	S SK
Jr  S SKJr  S\\   S\\   S\S	\S
\S\\\\\\4   4   4S jr " S S\5      rg)    N)Path)DictListTupleUnion)Tensor)Datasetlineheaderpathfolder_audio	ext_audioreturnc                    US   S:w  a  [        SUS    35      eU S   n[        R                  R                  X#U5      nUR	                  U5      (       d  Xd-  n[
        R                  " U5      u  px[        [        X5      5      n	XxU	4$ )N   r   z)expect `header[1]` to be 'path', but got )	
ValueErrorosr   joinendswith
torchaudioloaddictzip)
r
   r   r   r   r   fileidfilenamewaveformsample_ratedics
             W/var/www/auris/envauris/lib/python3.13/site-packages/torchaudio/datasets/commonvoice.pyload_commonvoice_itemr       s     ayFDVAYKPQQ!WFww||D7HY''&OOH5H
s6 
!C#%%    c            	       ~    \ rS rSrSrSrSrSrSS\\	\
4   S\	SS	4S
 jjrS\S\\\\\	\	4   4   4S jrS\4S jrSrg	)COMMONVOICE   a  *CommonVoice* :cite:`ardila2020common` dataset.

Args:
    root (str or Path): Path to the directory where the dataset is located.
         (Where the ``tsv`` file is present.)
    tsv (str, optional):
        The name of the tsv file used to construct the metadata, such as
        ``"train.tsv"``, ``"test.tsv"``, ``"dev.tsv"``, ``"invalidated.tsv"``,
        ``"validated.tsv"`` and ``"other.tsv"``. (default: ``"train.tsv"``)
z.txtz.mp3clipsroottsvr   Nc                 b   [         R                  " U5      U l        [         R                  R	                  U R                  U5      U l        [        U R
                  S5       n[        R                  " USS9n[        U5      U l
        [        U5      U l        S S S 5        g ! , (       d  f       g = f)Nr	)	delimiter)r   fspath_pathr   r   _tsvopencsvreadernext_headerlist_walker)selfr&   r'   tsv_walkers        r   __init__COMMONVOICE.__init__.   so     YYt_
GGLLS1	$))S!TZZ5F<DL<DL "!!s   !6B  
B.nc                     U R                   U   n[        X R                  U R                  U R                  U R
                  5      $ )a  Load the n-th sample from the dataset.

Args:
    n (int): The index of the sample to be loaded

Returns:
    Tuple of the following items;

    Tensor:
        Waveform
    int:
        Sample rate
    Dict[str, str]:
        Dictionary containing the following items from the corresponding TSV file;

        * ``"client_id"``
        * ``"path"``
        * ``"sentence"``
        * ``"up_votes"``
        * ``"down_votes"``
        * ``"age"``
        * ``"gender"``
        * ``"accent"``
)r5   r    r3   r-   _folder_audio
_ext_audio)r6   r;   r
   s      r   __getitem__COMMONVOICE.__getitem__9   s7    2 ||A$T<<TEWEWY]YhYhiir!   c                 ,    [        U R                  5      $ )N)lenr5   )r6   s    r   __len__COMMONVOICE.__len__U   s    4<<  r!   )r3   r-   r.   r5   )z	train.tsv)__name__
__module____qualname____firstlineno____doc___ext_txtr>   r=   r   strr   r9   intr   r   r   r?   rC   __static_attributes__ r!   r   r#   r#      sq    	 HJM	(U39- 	(C 	($ 	(jS jU63S#X+F%G j8! !r!   r#   )r0   r   pathlibr   typingr   r   r   r   r   torchr   torch.utils.datar	   rK   rL   r    r#   rN   r!   r   <module>rS      sz    
 	  + +   $&
s)&!#Y&.1&AD&QT&
63S#X&'&&8!' 8!r!   