o
    ZŽhJ  ã                   @   s>   d Z ddlZddlmZ ddlmZ G dd„ deƒZdgZdS )z$
Speech processor class for M-CTC-T
é    N)Úcontextmanageré   )ÚProcessorMixinc                       sT   e Zd ZdZdZdZ‡ fdd„Zdd„ Zdd	„ Zd
d„ Z	dd„ Z
edd„ ƒZ‡  ZS )ÚMCTCTProcessora[  
    Constructs a MCTCT processor which wraps a MCTCT feature extractor and a MCTCT tokenizer into a single processor.

    [`MCTCTProcessor`] offers all the functionalities of [`MCTCTFeatureExtractor`] and [`AutoTokenizer`]. See the
    [`~MCTCTProcessor.__call__`] and [`~MCTCTProcessor.decode`] for more information.

    Args:
        feature_extractor (`MCTCTFeatureExtractor`):
            An instance of [`MCTCTFeatureExtractor`]. The feature extractor is a required input.
        tokenizer (`AutoTokenizer`):
            An instance of [`AutoTokenizer`]. The tokenizer is a required input.
    ZMCTCTFeatureExtractorZAutoTokenizerc                    s    t ƒ  ||¡ | j| _d| _d S )NF)ÚsuperÚ__init__Úfeature_extractorÚcurrent_processorÚ_in_target_context_manager)Úselfr   Ú	tokenizer©Ú	__class__© úd/var/www/auris/lib/python3.10/site-packages/transformers/models/deprecated/mctct/processing_mctct.pyr   *   s   
zMCTCTProcessor.__init__c                 O   sö   | j r| j|i |¤ŽS d|v rt d¡ | d¡}n| dd¡}| dd¡}| dd¡}t|ƒdkr<|d }|dd… }|du rH|du rHtd	ƒ‚|durZ| j|g|¢R d|i|¤Ž}|durg| j|fi |¤Ž}|du rm|S |du rs|S |d
 |d< |S )a¤  
        When used in normal mode, this method forwards all its arguments to MCTCTFeatureExtractor's
        [`~MCTCTFeatureExtractor.__call__`] and returns its output. If used in the context
        [`~MCTCTProcessor.as_target_processor`] this method forwards all its arguments to AutoTokenizer's
        [`~AutoTokenizer.__call__`]. Please refer to the docstring of the above two methods for more information.
        Z
raw_speechzLUsing `raw_speech` as a keyword argument is deprecated. Use `audio` instead.ÚaudioNÚsampling_rateÚtextr   é   zAYou need to specify either an `audio` or `text` input to process.Ú	input_idsÚlabels)	r
   r	   ÚwarningsÚwarnÚpopÚlenÚ
ValueErrorr   r   )r   ÚargsÚkwargsr   r   r   ZinputsÚ	encodingsr   r   r   Ú__call__/   s.   
zMCTCTProcessor.__call__c                 O   ó   | j j|i |¤ŽS )z½
        This method forwards all its arguments to AutoTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please refer
        to the docstring of this method for more information.
        )r   Úbatch_decode©r   r   r   r   r   r   r!   U   ó   zMCTCTProcessor.batch_decodec                 O   s¶   | j r| jj|i |¤ŽS | dd¡}| dd¡}t|ƒdkr(|d }|dd… }|dur9| jj|g|¢R i |¤Ž}|durG| jj|fi |¤Ž}|du rM|S |du rS|S |d |d< |S )a¦  
        When used in normal mode, this method forwards all its arguments to MCTCTFeatureExtractor's
        [`~MCTCTFeatureExtractor.pad`] and returns its output. If used in the context
        [`~MCTCTProcessor.as_target_processor`] this method forwards all its arguments to PreTrainedTokenizer's
        [`~PreTrainedTokenizer.pad`]. Please refer to the docstring of the above two methods for more information.
        Úinput_featuresNr   r   r   r   )r
   r	   Úpadr   r   r   r   )r   r   r   r$   r   r   r   r   r%   \   s"   zMCTCTProcessor.padc                 O   r    )z·
        This method forwards all its arguments to AutoTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to the
        docstring of this method for more information.
        )r   Údecoder"   r   r   r   r&   z   r#   zMCTCTProcessor.decodec                 c   s2    t  d¡ d| _| j| _dV  | j| _d| _dS )z
        Temporarily sets the tokenizer for processing the input. Useful for encoding the labels when fine-tuning MCTCT.
        zî`as_target_processor` is deprecated and will be removed in v5 of Transformers. You can process your labels by using the argument `text` of the regular `__call__` method (either in the same call as your audio inputs, or in a separate call.TNF)r   r   r
   r   r	   r   )r   r   r   r   Úas_target_processor   s   €ÿ
z"MCTCTProcessor.as_target_processor)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Zfeature_extractor_classZtokenizer_classr   r   r!   r%   r&   r   r'   Ú__classcell__r   r   r   r   r      s    &r   )r+   r   Ú
contextlibr   Zprocessing_utilsr   r   Ú__all__r   r   r   r   Ú<module>   s   
y