o
    Zhv                     @   s,  d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZ ddlZddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z# erae raddl$Z$e"%e&Z'ed Z(G d	d
 d
eZ)G dd deZ*ee*j+e*_+e*j+j dure*j+j j,dddde*j+_ dS dS )zH
Feature extraction saving/loading class for common feature extractors.
    N)UserDict)TYPE_CHECKINGAnyOptionalUnion   )custom_object_save)FEATURE_EXTRACTOR_NAMEPushToHubMixin
TensorTypeadd_model_info_to_auto_map"add_model_info_to_custom_pipelinescached_file	copy_funcdownload_urlis_flax_availableis_jax_tensoris_numpy_arrayis_offline_modeis_remote_urlis_tf_availableis_torch_availableis_torch_deviceis_torch_dtypeloggingrequires_backendsZSequenceFeatureExtractorc                       s   e Zd ZdZddeeeef  dedee	f f fddZ
dedee fd	d
ZdefddZdd Zdd Zdd Zdd Zdd Zddeeee	f  fddZddeeee	f  fddZdddZ  ZS ) BatchFeatureaU  
    Holds the output of the [`~SequenceFeatureExtractor.pad`] and feature extractor specific `__call__` methods.

    This class is derived from a python dictionary and can be used as a dictionary.

    Args:
        data (`dict`, *optional*):
            Dictionary of lists/arrays/tensors returned by the __call__/pad methods ('input_values', 'attention_mask',
            etc.).
        tensor_type (`Union[None, str, TensorType]`, *optional*):
            You can give a tensor_type here to convert the lists of integers in PyTorch/TensorFlow/Numpy Tensors at
            initialization.
    Ndatatensor_typec                    s   t  | | j|d d S )N)r   )super__init__convert_to_tensors)selfr   r   	__class__ T/var/www/auris/lib/python3.10/site-packages/transformers/feature_extraction_utils.pyr    L   s   zBatchFeature.__init__itemreturnc                 C   s   t |tr
| j| S td)z
        If the key is a string, returns the value of the dict associated to `key` ('input_values', 'attention_mask',
        etc.).
        zRIndexing with integers is not available when using Python based feature extractors)
isinstancestrr   KeyErrorr"   r'   r%   r%   r&   __getitem__P   s   

zBatchFeature.__getitem__c                 C   s    z| j | W S  ty   tw N)r   r+   AttributeErrorr,   r%   r%   r&   __getattr__Z   s
   zBatchFeature.__getattr__c                 C   s
   d| j iS Nr   r   r"   r%   r%   r&   __getstate__`      
zBatchFeature.__getstate__c                 C   s   d|v r|d | _ d S d S r1   r2   )r"   stater%   r%   r&   __setstate__c   s   zBatchFeature.__setstate__c                 C   
   | j  S r.   )r   keysr3   r%   r%   r&   r9   h   r5   zBatchFeature.keysc                 C   r8   r.   )r   valuesr3   r%   r%   r&   r:   l   r5   zBatchFeature.valuesc                 C   r8   r.   )r   itemsr3   r%   r%   r&   r;   p   r5   zBatchFeature.itemsc                    s   |d u rdS t |tst|}|tjkr)t stddd l}|j |j}| fS |tjkrFt	 s5tddd l
fdd j}| fS |tjkrat sRtddd lm} |j t}| fS d	 fdd	 t}| fS )
NNNzSUnable to convert output to TensorFlow tensors format, TensorFlow is not installed.r   zMUnable to convert output to PyTorch tensors format, PyTorch is not installed.c                    s   t | ttfr;t| dkr;t | d tjrt| } n t | d ttfr;t| d dkr;t | d d tjr;t| } t | tjrF | S  | S )Nr   )	r)   listtuplelennpndarrayarrayZ
from_numpytensor)value)torchr%   r&   	as_tensor   s   


z5BatchFeature._get_is_as_tensor_fns.<locals>.as_tensorzEUnable to convert output to JAX tensors format, JAX is not installed.c                    sn   t | ttfr0t | d tttjfr0dd | D }tt|dkr0|d u r0 dd | D td} tj| |dS )Nr   c                 S   s   g | ]}t |qS r%   )r?   .0valr%   r%   r&   
<listcomp>   s    zIBatchFeature._get_is_as_tensor_fns.<locals>.as_tensor.<locals>.<listcomp>r   c                 S   s   g | ]}t |qS r%   )r@   asarrayrG   r%   r%   r&   rJ      s    )dtype)	r)   r=   r>   r@   rA   r?   setobjectrK   )rD   rL   Z
value_lens)rF   r%   r&   rF      s
   $r.   )r)   r   Z
TENSORFLOWr   ImportErrorZ
tensorflowZconstant	is_tensorZPYTORCHr   rE   ZJAXr   Z	jax.numpynumpyrB   r   r   )r"   r   tfrP   Zjnpr%   )rF   rE   r&   _get_is_as_tensor_fnss   s<   

(

z"BatchFeature._get_is_as_tensor_fnsc                 C   sj   |du r| S |  |\}}|  D ]!\}}z||s"||}|| |< W q   |dkr/tdtd| S )a5  
        Convert the inner content to tensors.

        Args:
            tensor_type (`str` or [`~utils.TensorType`], *optional*):
                The type of tensors to use. If `str`, should be one of the values of the enum [`~utils.TensorType`]. If
                `None`, no modification is done.
        NZoverflowing_valueszKUnable to create tensor returning overflowing values of different lengths. zUnable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.)rS   r;   
ValueError)r"   r   rP   rF   keyrD   rC   r%   r%   r&   r!      s    	zBatchFeature.convert_to_tensorsc           
      O   s  t | dg ddl}i }|d}|dd}|du rEt|dkrE|d }t|r*nt|ts8t|s8t|tr;|}n
t	dt| d| 
 D ]2\}}	t|	|jrc||	rc|	j|i |||< qIt|	|jrw|durw|	j||d	||< qI|	||< qI|| _| S )
a  
        Send all values to device by calling `v.to(*args, **kwargs)` (PyTorch only). This should support casting in
        different `dtypes` and sending the `BatchFeature` to a different `device`.

        Args:
            args (`Tuple`):
                Will be passed to the `to(...)` function of the tensors.
            kwargs (`Dict`, *optional*):
                Will be passed to the `to(...)` function of the tensors.
                To enable asynchronous data transfer, set the `non_blocking` flag in `kwargs` (defaults to `False`).

        Returns:
            [`BatchFeature`]: The same instance after modification.
        rE   r   Ndevicenon_blockingFz*Attempting to cast a BatchFeature to type z. This is not supported.)rV   rW   )r   rE   getr?   r   r)   r*   r   intrT   r;   ZTensorZis_floating_pointtor   )
r"   argskwargsrE   Znew_datarV   rW   argkvr%   r%   r&   rZ      s(   

zBatchFeature.tor<   r.   )r(   r   )__name__
__module____qualname____doc__r   dictr*   r   r   r   r    r-   r0   r4   r7   r9   r:   r;   rS   r!   rZ   __classcell__r%   r%   r#   r&   r   =   s    .
;r   c                   @   sf  e Zd ZdZdZdd ZdefddZe						d+d
e	ee
jf dee	ee
jf  dededee	eef  defddZd,de	ee
jf defddZed
e	ee
jf deeeef eeef f fddZedeeef defddZdeeef fddZede	ee
jf defdd Zdefd!d"Zd#e	ee
jf fd$d%Zd&d' Zed-d)d*ZdS ).FeatureExtractionMixinz
    This is a feature extraction mixin used to provide saving/loading functionality for sequential and image feature
    extractors.
    Nc                 K   sl   | dd| _| D ](\}}zt| || W q ty3 } ztd| d| d|   |d}~ww dS )z'Set elements of `kwargs` as attributes.processor_classNz
Can't set z with value z for )pop_processor_classr;   setattrr/   loggererror)r"   r\   rU   rD   errr%   r%   r&   r      s   zFeatureExtractionMixin.__init__rg   c                 C   s
   || _ dS )z%Sets processor class as an attribute.N)ri   )r"   rg   r%   r%   r&   _set_processor_class  s   
z+FeatureExtractionMixin._set_processor_classFmainpretrained_model_name_or_path	cache_dirforce_downloadlocal_files_onlytokenrevisionc           
      K   s   ||d< ||d< ||d< ||d< | dd}|dur*tdt |dur(td|}|dur2||d	< | j|fi |\}	}| j|	fi |S )
a  
        Instantiate a type of [`~feature_extraction_utils.FeatureExtractionMixin`] from a feature extractor, *e.g.* a
        derived class of [`SequenceFeatureExtractor`].

        Args:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained feature_extractor hosted inside a model repo on
                  huggingface.co.
                - a path to a *directory* containing a feature extractor file saved using the
                  [`~feature_extraction_utils.FeatureExtractionMixin.save_pretrained`] method, e.g.,
                  `./my_model_directory/`.
                - a path or url to a saved feature extractor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model feature extractor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the feature extractor files and override the cached versions
                if they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or `bool`, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, or not specified, will use
                the token generated when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.


                <Tip>

                To test a pull request you made on the Hub, you can pass `revision="refs/pr/<pr_number>"`.

                </Tip>

            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final feature extractor object. If `True`, then this
                functions returns a `Tuple(feature_extractor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not feature extractor attributes: i.e., the part of
                `kwargs` which has not been used to update `feature_extractor` and is otherwise ignored.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are feature extractor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* feature extractor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        Returns:
            A feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`].

        Examples:

        ```python
        # We can't instantiate directly the base class *FeatureExtractionMixin* nor *SequenceFeatureExtractor* so let's show the examples on a
        # derived class: *Wav2Vec2FeatureExtractor*
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h"
        )  # Download feature_extraction_config from huggingface.co and cache.
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
            "./test/saved_model/"
        )  # E.g. feature_extractor (or model) was saved using *save_pretrained('./test/saved_model/')*
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("./test/saved_model/preprocessor_config.json")
        feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h", return_attention_mask=False, foo=False
        )
        assert feature_extractor.return_attention_mask is False
        feature_extractor, unused_kwargs = Wav2Vec2FeatureExtractor.from_pretrained(
            "facebook/wav2vec2-base-960h", return_attention_mask=False, foo=False, return_unused_kwargs=True
        )
        assert feature_extractor.return_attention_mask is False
        assert unused_kwargs == {"foo": False}
        ```rq   rr   rs   ru   use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.V`token` and `use_auth_token` are both specified. Please set only the argument `token`.rt   )rh   warningswarnFutureWarningrT   get_feature_extractor_dict	from_dict)
clsrp   rq   rr   rs   rt   ru   r\   rv   feature_extractor_dictr%   r%   r&   from_pretrained  s&   Xz&FeatureExtractionMixin.from_pretrainedsave_directorypush_to_hubc           	      K   s  | dd}|dur tdt |dddurtd||d< tj|r.t	d| dtj
|dd	 |rX| d
d}| d|tjjd }| j|fi |}| |}| jdurdt| || d tj|t}| | td|  |r| j|||||dd |gS )az  
        Save a feature_extractor object to the directory `save_directory`, so that it can be re-loaded using the
        [`~feature_extraction_utils.FeatureExtractionMixin.from_pretrained`] class method.

        Args:
            save_directory (`str` or `os.PathLike`):
                Directory where the feature extractor JSON file will be saved (will be created if it does not exist).
            push_to_hub (`bool`, *optional*, defaults to `False`):
                Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
                repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
                namespace).
            kwargs (`Dict[str, Any]`, *optional*):
                Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
        rv   Nrw   rt   rx   zProvided path (z#) should be a directory, not a fileT)exist_okcommit_messagerepo_id)configzFeature extractor saved in )r   rt   )rh   ry   rz   r{   rX   rT   ospathisfileAssertionErrormakedirssplitsepZ_create_repoZ_get_files_timestamps_auto_classr   joinr	   to_json_filerk   infoZ_upload_modified_files)	r"   r   r   r\   rv   r   r   Zfiles_timestampsZoutput_feature_extractor_filer%   r%   r&   save_pretrained  sB   


z&FeatureExtractionMixin.save_pretrainedr(   c                 K   sp  | dd}| dd}| dd}| dd}| dd}| dd}| d	d}	| d
d}
| dd}|	durJtdt |durHtd|	}| dd}| dd}d|d}|durc||d< t ro|
sotd d}
t|}t	j
|}t	j
|rt	j
|t}t	j
|r|}d}n;t|r|}t|}n0t}zt|||||||
||||d}W n ty     ty   td| d| dt dw z!t|dd}| }W d   n1 sw   Y  t|}W n tjy   td| dw |r
td|  ntd| d|  |s4d |v r&t|d  ||d < d!|v r4t|d! ||d!< ||fS )"a6  
        From a `pretrained_model_name_or_path`, resolve to a dictionary of parameters, to be used for instantiating a
        feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`] using `from_dict`.

        Parameters:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                The identifier of the pre-trained checkpoint from which we want the dictionary of parameters.

        Returns:
            `Tuple[Dict, Dict]`: The dictionary(ies) that will be used to instantiate the feature extractor object.
        rq   Nrr   Fresume_downloadproxies	subfolderrt   rv   rs   ru   rw   rx   Z_from_pipelineZ
_from_autofeature extractor)	file_typefrom_auto_classZusing_pipelinez+Offline mode: forcing local_files_only=TrueT)	rq   rr   r   r   rs   r   rt   
user_agentru   z"Can't load feature extractor for 'z'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'z2' is the correct path to a directory containing a z fileutf-8encodingz"It looks like the config file at 'z' is not a valid JSON file.zloading configuration file z from cache at Zauto_mapZcustom_pipelines)rh   ry   rz   r{   rT   r   rk   r   r*   r   r   isdirr   r	   r   r   r   r   OSError	ExceptionopenreadjsonloadsJSONDecodeErrorr   r   )r~   rp   r\   rq   rr   r   r   r   rt   rv   rs   ru   Zfrom_pipeliner   r   is_localZfeature_extractor_fileZresolved_feature_extractor_filereadertextr   r%   r%   r&   r|     s   




	



z1FeatureExtractionMixin.get_feature_extractor_dictr   c                 K   s   | dd}g }| D ]\}}||v r|||< || q|D ]}| |d q | di |}td|  |r>||fS |S )a	  
        Instantiates a type of [`~feature_extraction_utils.FeatureExtractionMixin`] from a Python dictionary of
        parameters.

        Args:
            feature_extractor_dict (`Dict[str, Any]`):
                Dictionary that will be used to instantiate the feature extractor object. Such a dictionary can be
                retrieved from a pretrained checkpoint by leveraging the
                [`~feature_extraction_utils.FeatureExtractionMixin.to_dict`] method.
            kwargs (`Dict[str, Any]`):
                Additional parameters from which to initialize the feature extractor object.

        Returns:
            [`~feature_extraction_utils.FeatureExtractionMixin`]: The feature extractor object instantiated from those
            parameters.
        return_unused_kwargsFNzFeature extractor r%   )rh   r;   appendrk   r   )r~   r   r\   r   	to_removerU   rD   Zfeature_extractorr%   r%   r&   r}   6  s   
z FeatureExtractionMixin.from_dictc                 C   s8   t | j}| jj|d< d|v r|d= d|v r|d= |S )z
        Serializes this instance to a Python dictionary. Returns:
            `Dict[str, Any]`: Dictionary of all the attributes that make up this configuration instance.
        Zfeature_extractor_typeZmel_filtersZwindow)copydeepcopy__dict__r$   r`   )r"   outputr%   r%   r&   to_dict[  s   zFeatureExtractionMixin.to_dict	json_filec                 C   sL   t |dd}| }W d   n1 sw   Y  t|}| di |S )a  
        Instantiates a feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`] from the path to
        a JSON file of parameters.

        Args:
            json_file (`str` or `os.PathLike`):
                Path to the JSON file containing the parameters.

        Returns:
            A feature extractor of type [`~feature_extraction_utils.FeatureExtractionMixin`]: The feature_extractor
            object instantiated from that JSON file.
        r   r   Nr%   )r   r   r   r   )r~   r   r   r   r   r%   r%   r&   from_json_fileh  s
   

z%FeatureExtractionMixin.from_json_filec                 C   sb   |   }| D ]\}}t|tjr| ||< q|dd}|dur'||d< tj|dddd S )z
        Serializes this instance to a JSON string.

        Returns:
            `str`: String containing all the attributes that make up this feature_extractor instance in JSON format.
        ri   Nrg      T)indent	sort_keys
)	r   r;   r)   r@   rA   tolistrh   r   dumps)r"   
dictionaryrU   rD   ri   r%   r%   r&   to_json_string{  s   z%FeatureExtractionMixin.to_json_stringjson_file_pathc                 C   sB   t |ddd}||   W d   dS 1 sw   Y  dS )z
        Save this instance to a JSON file.

        Args:
            json_file_path (`str` or `os.PathLike`):
                Path to the JSON file in which this feature_extractor instance's parameters will be saved.
        wr   r   N)r   writer   )r"   r   writerr%   r%   r&   r     s   "z#FeatureExtractionMixin.to_json_filec                 C   s   | j j d|   S )N )r$   r`   r   r3   r%   r%   r&   __repr__  s   zFeatureExtractionMixin.__repr__AutoFeatureExtractorc                 C   sD   t |ts|j}ddlm  m} t||st| d|| _dS )a  
        Register this class with a given auto class. This should only be used for custom feature extractors as the ones
        in the library are already mapped with `AutoFeatureExtractor`.

        <Tip warning={true}>

        This API is experimental and may have some slight breaking changes in the next releases.

        </Tip>

        Args:
            auto_class (`str` or `type`, *optional*, defaults to `"AutoFeatureExtractor"`):
                The auto class to register this new feature extractor with.
        r   Nz is not a valid auto class.)	r)   r*   r`   Ztransformers.models.automodelsautohasattrrT   r   )r~   Z
auto_classZauto_moduler%   r%   r&   register_for_auto_class  s   


z.FeatureExtractionMixin.register_for_auto_class)NFFNro   )F)r   )r`   ra   rb   rc   r   r    r*   rn   classmethodr   r   PathLiker   boolr   r   r>   rd   r   r|   PreTrainedFeatureExtractorr}   r   r   r   r   r   r   r%   r%   r%   r&   rf      sR    o=t$rf   r   r   zfeature extractor file)rN   Zobject_classZobject_files)-rc   r   r   r   ry   collectionsr   typingr   r   r   r   rQ   r@   Zdynamic_module_utilsr   utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rE   Z
get_loggerr`   rk   r   r   rf   r   formatr%   r%   r%   r&   <module>   s6   T
 @   @