
    fTh                     0    S r SSKJr   " S S\5      rS/rg)z$Speech processor class for SpeechT5.   )ProcessorMixinc                   L   ^  \ rS rSrSrSrSrU 4S jrS rS r	S r
S	 rS
rU =r$ )SpeechT5Processor   aY  
Constructs a SpeechT5 processor which wraps a feature extractor and a tokenizer into a single processor.

[`SpeechT5Processor`] offers all the functionalities of [`SpeechT5FeatureExtractor`] and [`SpeechT5Tokenizer`]. See
the docstring of [`~SpeechT5Processor.__call__`] and [`~SpeechT5Processor.decode`] for more information.

Args:
    feature_extractor (`SpeechT5FeatureExtractor`):
        An instance of [`SpeechT5FeatureExtractor`]. The feature extractor is a required input.
    tokenizer (`SpeechT5Tokenizer`):
        An instance of [`SpeechT5Tokenizer`]. The tokenizer is a required input.
SpeechT5FeatureExtractorSpeechT5Tokenizerc                 $   > [         TU ]  X5        g )N)super__init__)selffeature_extractor	tokenizer	__class__s      h/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/speecht5/processing_speecht5.pyr   SpeechT5Processor.__init__%   s    *6    c                 V   UR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUR                  SS5      nUb  Ub  [        S5      eUb  Ub  [        S5      eUc  Uc  Uc  Uc  [        S	5      eUb  U R                  " U/UQ7SU0UD6nOUb  U R                  " U40 UD6nOSnUb  U R                  " XUS
.UD6n	U	S   n
OUb  U R                  " U40 UD6n	U	S   n
OSn	Uc  U	$ U	b  W
US'   U	R	                  S5      nUb  XS'   U$ )a  
Processes audio and text input, as well as audio and text targets.

You can process audio by using the argument `audio`, or process audio targets by using the argument
`audio_target`. This forwards the arguments to SpeechT5FeatureExtractor's
[`~SpeechT5FeatureExtractor.__call__`].

You can process text by using the argument `text`, or process text labels by using the argument `text_target`.
This forwards the arguments to SpeechT5Tokenizer's [`~SpeechT5Tokenizer.__call__`].

Valid input combinations are:

- `text` only
- `audio` only
- `text_target` only
- `audio_target` only
- `text` and `audio_target`
- `audio` and `audio_target`
- `text` and `text_target`
- `audio` and `text_target`

Please refer to the docstring of the above two methods for more information.
audioNtexttext_targetaudio_targetsampling_ratez\Cannot process both `audio` and `text` inputs. Did you mean `audio_target` or `text_target`?z\Cannot process both `audio_target` and `text_target` inputs. Did you mean `audio` or `text`?zaYou need to specify either an `audio`, `audio_target`, `text`, or `text_target` input to process.)r   r   input_values	input_idslabelsattention_maskdecoder_attention_mask)pop
ValueErrorr   r   get)r   argskwargsr   r   r   r   r   inputstargetsr   r   s               r   __call__SpeechT5Processor.__call__(   s   0 

7D)zz&$'jj5zz.$7

?D9!1n  #(?n  =\1dl{GZs  ++E`D``Y_`F^^D3F3FF#,,]juntuG^,F$nn[;F;G[)FG>N%F8%,[[1A%B"%13I/0r   c                    UR                  SS5      nUR                  SS5      nUR                  SS5      nUb  Ub  [        S5      eUc  Uc  Uc  [        S5      eUb!  U R                  R                  " U/UQ70 UD6nO#Ub  U R                  R                  " U40 UD6nOSnUb  SU;   d  [        U[        5      (       a,  SUS   ;   a#  U R                  R                  " U40 UD6nUS   nOsU R                  R                  nU R                  R                  U R                  l        U R                  R                  " U/UQ70 UD6nXR                  l        US   nOSnUc  U$ Ub  XVS'   UR                  S5      n	U	b  XS	'   U$ )
a  
Collates the audio and text inputs, as well as their targets, into a padded batch.

Audio inputs are padded by SpeechT5FeatureExtractor's [`~SpeechT5FeatureExtractor.pad`]. Text inputs are padded
by SpeechT5Tokenizer's [`~SpeechT5Tokenizer.pad`].

Valid input combinations are:

- `input_ids` only
- `input_values` only
- `labels` only, either log-mel spectrograms or text tokens
- `input_ids` and log-mel spectrogram `labels`
- `input_values` and text `labels`

Please refer to the docstring of the above two methods for more information.
r   Nr   r   z:Cannot process both `input_values` and `input_ids` inputs.zZYou need to specify either an `input_values`, `input_ids`, or `labels` input to be padded.    r   r   )
r   r   r   padr   
isinstancelistfeature_sizenum_mel_binsr    )
r   r!   r"   r   r   r   r#   r$   feature_size_hackr   s
             r   r)   SpeechT5Processor.pado   s   " zz.$7JJ{D1	Hd+#	(=YZZI$5&.l  #++//NtNvNF"^^''	<V<FFf$FD)A)AkU[\]U^F^..,,V>v> -$($:$:$G$G!6:6L6L6Y6Y&&30044VMdMfM6G&&3 0G>N%8%,[[1A%B"%13I/0r   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to SpeechT5Tokenizer's [`~SpeechT5Tokenizer.batch_decode`]. Please refer
to the docstring of this method for more information.
)r   batch_decoder   r!   r"   s      r   r1   SpeechT5Processor.batch_decode   s    
 ~~**D;F;;r   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to SpeechT5Tokenizer's [`~SpeechT5Tokenizer.decode`]. Please refer to
the docstring of this method for more information.
)r   decoder2   s      r   r5   SpeechT5Processor.decode   s    
 ~~$$d5f55r    )__name__
__module____qualname____firstlineno____doc__feature_extractor_classtokenizer_classr   r%   r)   r1   r5   __static_attributes____classcell__)r   s   @r   r   r      s6     9)O7EN:x<6 6r   r   N)r<   processing_utilsr   r   __all__r7   r   r   <module>rC      s&    + .c6 c6L 
r   