
    h<              	          S SK r S SKJr  S SKJrJrJrJrJrJ	r	  S SK
r
S SK
Jr  S SKJr  S SKJr  S SKJrJr  S SKJrJr  S	S
KJr  S	SKJr  / rSr " S S\R8                  5      r " S S\R8                  5      r " S S\
R>                  R@                  \RB                  5      r" " S S\
R>                  R@                  \RB                  5      r# " S S5      r$ " S S5      r%\ " S S5      5       r&\ " S S5      5       r' " S S5      r(\ " S S \'\&\$\5      5       r)\ " S! S"\'\&\%\5      5       r*\ " S# S$\(\&\$\5      5       r+\ " S% S&\(\&\%\5      5       r,\+" S'\RZ                  " S(S)9S*9r.S+\.l/        \," S,\RZ                  " S-S)9S*9r0S.\0l/        \)" S/\RZ                  " S(S)9S0\Rb                  " 5       S19r2S2\2l/        \*" S3\RZ                  " S-S)9S0\Rb                  " 5       S19r3S4\3l/        g)5    N)	dataclass)AnyDictListOptionalTupleUnion)Tensor)load_state_dict_from_url)mu_law_decoding)	Tacotron2WaveRNN)
GriffinLimInverseMelScale   )utils)Tacotron2TTSBundlez.https://download.pytorch.org/torchaudio/modelsc                   d   ^  \ rS rSrU 4S jr\S 5       rS\\\	\   4   S\
\\4   4S jrSrU =r$ )_EnglishCharProcessor   c                    > [         TU ]  5         [        R                  " 5       U l        [        U R                  5       VVs0 s H  u  pX!_M	     snnU l        g s  snnf N)super__init__r   
_get_chars_tokens	enumerate_mapping)selfis	__class__s      V/var/www/auris/envauris/lib/python3.13/site-packages/torchaudio/pipelines/_tts/impl.pyr   _EnglishCharProcessor.__init__   sF    '')*3DLL*AB*A$!*ABBs   Ac                     U R                   $ r   r   r   s    r#   tokens_EnglishCharProcessor.tokens       ||    textsreturnc           	         [        U[        5      (       a  U/nU VVs/ s HA  o"R                  5        Vs/ s H#  o3U R                  ;   d  M  U R                  U   PM%     snPMC     nnn[        R
                  " U5      $ s  snf s  snnf r   )
isinstancestrlowerr   r   
_to_tensor)r   r,   tcindicess        r#   __call___EnglishCharProcessor.__call__#   so    eS!!GEX]^X]STggiNi;M$DMM!$iNX]^(( O^s   BA?A?B?B)r   r   __name__
__module____qualname____firstlineno__r   propertyr(   r	   r0   r   r   r
   r6   __static_attributes____classcell__r"   s   @r#   r   r      sK    C
  )eCcN3 )ffn8M ) )r+   r   c                   l   ^  \ rS rSrSS.U 4S jjr\S 5       rS\\\	\   4   S\
\\4   4S jrS	rU =r$ )
_EnglishPhoneProcessor*   N	dl_kwargsc                  > [         TU ]  5         [        R                  " 5       U l        [        U R                  5       VVs0 s H  u  p#X2_M	     snnU l        [        R                  " SUS9U l        SU l	        g s  snnf )Nzen_us_cmudict_forward.ptrD   z(\[[A-Z]+?\]|[_!'(),.:;? -]))
r   r   r   _get_phonesr   r   r   _load_phonemizer_phonemizer_pattern)r   rE   r    pr"   s       r#   r   _EnglishPhoneProcessor.__init__+   sg    ((**3DLL*AB*A$!*AB 112LXab7 Cs   A;c                     U R                   $ r   r&   r'   s    r#   r(   _EnglishPhoneProcessor.tokens2   r*   r+   r,   r-   c           	         [        U[        5      (       a  U/n/ nU R                  USS9 Hx  n[        R                  " U R
                  U5       Vs/ s H  n[        R                  " SSU5      PM     nnUR                  U Vs/ s H  o`R                  U   PM     sn5        Mz     [        R                  " U5      $ s  snf s  snf )Nen_us)langz[\[\]] )r/   r0   rI   refindallrJ   subappendr   r   r2   )r   r,   r5   phonesrretrK   s          r#   r6   _EnglishPhoneProcessor.__call__6   s    eS!!GE&&u7&;F57ZZv5VW5V266)R+5VCWNNc:cMM!,c:; < (( X:s   "B=C
)r   rJ   rI   r   r8   r@   s   @r#   rB   rB   *   sR    $( 8 8  	)eCcN3 	)ffn8M 	) 	)r+   rB   c                   X   ^  \ rS rSrSS\S\\   4U 4S jjjr\S 5       r	S	S jr
SrU =r$ )
_WaveRNNVocoderG   modelmin_level_dbc                 H   > [         TU ]  5         SU l        Xl        X l        g )N"V  )r   r   _sample_rate_model_min_level_db)r   r^   r_   r"   s      r#   r   _WaveRNNVocoder.__init__H   s!    !)r+   c                     U R                   $ r   rb   r'   s    r#   sample_rate_WaveRNNVocoder.sample_rateN          r+   c                    [         R                  " U5      nS[         R                  " [         R                  " USS95      -  nU R                  b2  U R                  U-
  U R                  -  n[         R                  " USSS9nU R
                  R                  X5      u  p2[        R                  " X0R
                  R                  5      n[        X0R
                  R                  5      nUR                  S5      nX24$ )N   gh㈵>)minr   r   )rm   max)torchexplog10clamprd   rc   inferr   _unnormalize_waveformn_bitsr   	n_classessqueeze)r   mel_speclengthswaveforms       r#   forward_WaveRNNVocoder.forwardR   s    99X&EKKd$CDD)**X59K9KKH{{8:H KK--h@..x9K9KL"8[[-B-BC##A&  r+   )rd   rc   rb   )ir   )r9   r:   r;   r<   r   r   floatr   r=   rh   r{   r>   r?   r@   s   @r#   r\   r\   G   s=    *g *Xe_ * * ! !
! 
!r+   r\   c                   B   ^  \ rS rSrU 4S jr\S 5       rSS jrSrU =r	$ )_GriffinLimVocoder_   c           
         > [         TU ]  5         SU l        [        SSU R                  SSSSS9U l        [        SS	S
SS9U l        g )Nra   i  P   g        g     @@slaney)n_stftn_melsrh   f_minf_max	mel_scalenormi   r      )n_fftpower
hop_length
win_length)r   r   rb   r   rh   _inv_melr   _griffin_lim)r   r"   s    r#   r   _GriffinLimVocoder.__init__`   sX    !'!((
 '	
r+   c                     U R                   $ r   rg   r'   s    r#   rh   _GriffinLimVocoder.sample_rates   rj   r+   c                    [         R                  " U5      nUR                  5       R                  5       R	                  S5      nU R                  U5      nUR                  5       R	                  S5      nU R                  U5      nXB4$ )NTF)ro   rp   clonedetachrequires_grad_r   r   )r   rx   ry   spec	waveformss        r#   r{   _GriffinLimVocoder.forwardw   sm    99X&>>#**,;;DA}}X&{{}++E2%%d+	!!r+   )r   r   rb   r   )
r9   r:   r;   r<   r   r=   rh   r{   r>   r?   r@   s   @r#   r   r   _   s&    
& ! !" "r+   r   c                   6    \ rS rSrS\R
                  4S jrSrg)
_CharMixin   r-   c                     [        5       $ r   )r   r'   s    r#   get_text_processor_CharMixin.get_text_processor   s    $&&r+    Nr9   r:   r;   r<   r   TextProcessorr   r>   r   r+   r#   r   r      s    '$6$D$D 'r+   r   c                   >    \ rS rSrSS.S\R
                  4S jjrSrg)_PhoneMixin   NrD   r-   c                    [        US9$ NrD   )rB   )r   rE   s     r#   r   _PhoneMixin.get_text_processor   s    %	::r+   r   r   r   r+   r#   r   r      s    .2 ;7I7W7W ; ;r+   r   c                   J    \ rS rSr% \\S'   \\\4   \S'   SS.S\4S jjr	Sr
g)	_Tacotron2Mixin   _tacotron2_path_tacotron2_paramsNrD   r-   c                    [        S0 U R                  D6n[         SU R                   3nUc  0 OUn[	        U40 UD6nUR                  U5        UR                  5         U$ N/r   )r   r   	_BASE_URLr   r   load_state_dictevalr   rE   r^   url
state_dicts        r#   get_tacotron2_Tacotron2Mixin.get_tacotron2   sc    3D2231T1123#+B	-c?Y?
j)

r+   r   )r9   r:   r;   r<   r0   __annotations__r   r   r   r   r>   r   r+   r#   r   r      s*    CH~%)- )  r+   r   c                   \    \ rS rSr% \\   \S'   \\\\4      \S'   SS.S jr	SS.S jr
Srg)	_WaveRNNMixin   _wavernn_path_wavernn_paramsNrD   c                6    U R                  US9n[        U5      $ r   )_get_wavernnr\   )r   rE   wavernns      r#   get_vocoder_WaveRNNMixin.get_vocoder   s     ##i#8w''r+   c                    [        S0 U R                  D6n[         SU R                   3nUc  0 OUn[	        U40 UD6nUR                  U5        UR                  5         U$ r   )r   r   r   r   r   r   r   r   s        r#   r   _WaveRNNMixin._get_wavernn   sc    /$../1T//01#+B	-c?Y?
j)

r+   r   )r9   r:   r;   r<   r   r0   r   r   r   r   r   r>   r   r+   r#   r   r      s6    C= d38n--'+ ( )-  r+   r   c                       \ rS rSrS rSrg)_GriffinLimMixin   c                     [        5       $ r   )r   )r   _s     r#   r   _GriffinLimMixin.get_vocoder   s    !##r+   r   N)r9   r:   r;   r<   r   r>   r   r+   r#   r   r      s    $r+   r   c                       \ rS rSrSrg)_Tacotron2WaveRNNCharBundle   r   Nr9   r:   r;   r<   r>   r   r+   r#   r   r          r+   r   c                       \ rS rSrSrg)_Tacotron2WaveRNNPhoneBundle   r   Nr   r   r+   r#   r   r      r   r+   r   c                       \ rS rSrSrg)_Tacotron2GriffinLimCharBundle   r   Nr   r   r+   r#   r   r      r   r+   r   c                       \ rS rSrSrg)_Tacotron2GriffinLimPhoneBundle   r   Nr   r   r+   r#   r   r      r   r+   r   z5tacotron2_english_characters_1500_epochs_ljspeech.pth&   )	n_symbols)r   r   a  Character-based TTS pipeline with :py:class:`~torchaudio.models.Tacotron2` trained on *LJSpeech* :cite:`ljspeech17` for 1,500 epochs, and
:py:class:`~torchaudio.transforms.GriffinLim` as vocoder.

The text processor encodes the input texts character-by-character.

You can find the training script `here <https://github.com/pytorch/audio/tree/main/examples/pipeline_tacotron2>`__.
The default parameters were used.

Please refer to :func:`torchaudio.pipelines.Tacotron2TTSBundle` for the usage.

Example - "Hello world! T T S stands for Text to Speech!"

   .. image:: https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH.png
      :alt: Spectrogram generated by Tacotron2

   .. raw:: html

      <audio controls="controls">
         <source src="https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH.wav" type="audio/wav">
         Your browser does not support the <code>audio</code> element.
      </audio>

Example - "The examination and testimony of the experts enabled the Commission to conclude that five shots may have been fired,"

   .. image:: https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH_v2.png
      :alt: Spectrogram generated by Tacotron2

   .. raw:: html

      <audio controls="controls">
         <source src="https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH_v2.wav" type="audio/wav">
         Your browser does not support the <code>audio</code> element.
      </audio>
z3tacotron2_english_phonemes_1500_epochs_ljspeech.pth`   a  Phoneme-based TTS pipeline with :py:class:`~torchaudio.models.Tacotron2` trained on *LJSpeech* :cite:`ljspeech17` for 1,500 epochs and
:py:class:`~torchaudio.transforms.GriffinLim` as vocoder.

The text processor encodes the input texts based on phoneme.
It uses `DeepPhonemizer <https://github.com/as-ideas/DeepPhonemizer>`__ to convert
graphemes to phonemes.
The model (*en_us_cmudict_forward*) was trained on
`CMUDict <http://www.speech.cs.cmu.edu/cgi-bin/cmudict>`__.

You can find the training script `here <https://github.com/pytorch/audio/tree/main/examples/pipeline_tacotron2>`__.
The text processor is set to the *"english_phonemes"*.

Please refer to :func:`torchaudio.pipelines.Tacotron2TTSBundle` for the usage.

Example - "Hello world! T T S stands for Text to Speech!"

   .. image:: https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH.png
      :alt: Spectrogram generated by Tacotron2

   .. raw:: html

      <audio controls="controls">
         <source src="https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH.wav" type="audio/wav">
         Your browser does not support the <code>audio</code> element.
      </audio>

Example - "The examination and testimony of the experts enabled the Commission to conclude that five shots may have been fired,"

   .. image:: https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH_v2.png
      :alt: Spectrogram generated by Tacotron2

   .. raw:: html

      <audio controls="controls">
         <source src="https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH_v2.wav" type="audio/wav">
         Your browser does not support the <code>audio</code> element.
      </audio>

z=tacotron2_english_characters_1500_epochs_wavernn_ljspeech.pthz%wavernn_10k_epochs_8bits_ljspeech.pth)r   r   r   r   a  Character-based TTS pipeline with :py:class:`~torchaudio.models.Tacotron2` trained on *LJSpeech* :cite:`ljspeech17` for 1,500 epochs and :py:class:`~torchaudio.models.WaveRNN` vocoder trained on 8 bits depth waveform of *LJSpeech* :cite:`ljspeech17` for 10,000 epochs.

The text processor encodes the input texts character-by-character.

You can find the training script `here <https://github.com/pytorch/audio/tree/main/examples/pipeline_tacotron2>`__.
The following parameters were used; ``win_length=1100``, ``hop_length=275``, ``n_fft=2048``,
``mel_fmin=40``, and ``mel_fmax=11025``.

You can find the training script `here <https://github.com/pytorch/audio/tree/main/examples/pipeline_wavernn>`__.

Please refer to :func:`torchaudio.pipelines.Tacotron2TTSBundle` for the usage.

Example - "Hello world! T T S stands for Text to Speech!"

   .. image:: https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_WAVERNN_CHAR_LJSPEECH.png
      :alt: Spectrogram generated by Tacotron2

   .. raw:: html

      <audio controls="controls">
         <source src="https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_WAVERNN_CHAR_LJSPEECH.wav" type="audio/wav">
         Your browser does not support the <code>audio</code> element.
      </audio>

Example - "The examination and testimony of the experts enabled the Commission to conclude that five shots may have been fired,"

   .. image:: https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_WAVERNN_CHAR_LJSPEECH_v2.png
      :alt: Spectrogram generated by Tacotron2

   .. raw:: html

      <audio controls="controls">
         <source src="https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_WAVERNN_CHAR_LJSPEECH_v2.wav" type="audio/wav">
         Your browser does not support the <code>audio</code> element.
      </audio>
z;tacotron2_english_phonemes_1500_epochs_wavernn_ljspeech.ptha  Phoneme-based TTS pipeline with :py:class:`~torchaudio.models.Tacotron2` trained on *LJSpeech* :cite:`ljspeech17` for 1,500 epochs, and
:py:class:`~torchaudio.models.WaveRNN` vocoder trained on 8 bits depth waveform of *LJSpeech* :cite:`ljspeech17` for 10,000 epochs.

The text processor encodes the input texts based on phoneme.
It uses `DeepPhonemizer <https://github.com/as-ideas/DeepPhonemizer>`__ to convert
graphemes to phonemes.
The model (*en_us_cmudict_forward*) was trained on
`CMUDict <http://www.speech.cs.cmu.edu/cgi-bin/cmudict>`__.

You can find the training script for Tacotron2 `here <https://github.com/pytorch/audio/tree/main/examples/pipeline_tacotron2>`__.
The following parameters were used; ``win_length=1100``, ``hop_length=275``, ``n_fft=2048``,
``mel_fmin=40``, and ``mel_fmax=11025``.

You can find the training script for WaveRNN `here <https://github.com/pytorch/audio/tree/main/examples/pipeline_wavernn>`__.

Please refer to :func:`torchaudio.pipelines.Tacotron2TTSBundle` for the usage.

Example - "Hello world! T T S stands for Text to Speech!"

   .. image:: https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_WAVERNN_PHONE_LJSPEECH.png
      :alt: Spectrogram generated by Tacotron2

   .. raw:: html

      <audio controls="controls">
         <source src="https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_WAVERNN_PHONE_LJSPEECH.wav" type="audio/wav">
         Your browser does not support the <code>audio</code> element.
      </audio>


Example - "The examination and testimony of the experts enabled the Commission to conclude that five shots may have been fired,"

   .. image:: https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_WAVERNN_PHONE_LJSPEECH_v2.png
      :alt: Spectrogram generated by Tacotron2

   .. raw:: html

      <audio controls="controls">
         <source src="https://download.pytorch.org/torchaudio/doc-assets/TACOTRON2_WAVERNN_PHONE_LJSPEECH_v2.wav" type="audio/wav">
         Your browser does not support the <code>audio</code> element.
      </audio>
)4rS   dataclassesr   typingr   r   r   r   r   r	   ro   r
   torchaudio._internalr   torchaudio.functionalr   torchaudio.modelsr   r   torchaudio.transformsr   r   rR   r   	interfacer   __all__r   r   r   rB   nnModuleVocoderr\   r   r   r   r   r   r   r   r   r   r   _get_taco_params"TACOTRON2_GRIFFINLIM_CHAR_LJSPEECH__doc__#TACOTRON2_GRIFFINLIM_PHONE_LJSPEECH_get_wrnn_paramsTACOTRON2_WAVERNN_CHAR_LJSPEECH TACOTRON2_WAVERNN_PHONE_LJSPEECHr   r+   r#   <module>r      s6   	 ! : :   9 1 0 =  )
<	).<< )")/== ):!ehhoo'9'A'A !0"*<*D*D "L' '
; ;
      $$ $ 	-*N` 	 	 	=/;Pb 	 	 	%5
Tf 	 	 	&6Vh 	 	 &DK,,r:& "!. " *F 'FI,,r:' #&/ # +P #>S,,r:9**,	# #+  'J $@Q,,r:9**,	$  ),   (r+   