o
    ZhI                     @   s  U d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZmZ ddlmZ ddlmZmZ ddlmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZm Z m!Z!m"Z" e#e$Z%er|e Z&ee'ee
e' e
e' f f e(d< neg dZ&e&) D ]\Z*Z+e+Z,e sdZ,e,e&e*< qeee&Z-de'fddZ.							d#dee'ej/f de
ee'ej/f  de0de
e0 de
e	e'e'f  de
ee0e'f  de
e' de0fddZ1eddG d d! d!Z2d"d!gZ3dS )$zAutoVideoProcessor class.    N)OrderedDict)TYPE_CHECKINGDictOptionalTupleUnion   )PretrainedConfig)get_class_from_dynamic_moduleresolve_trust_remote_code)CONFIG_NAMEVIDEO_PROCESSOR_NAMEcached_fileis_torchvision_availablelogging)requires)BaseVideoProcessor   )_LazyAutoMapping)CONFIG_MAPPING_NAMES
AutoConfigmodel_type_to_module_name!replace_list_option_in_docstringsVIDEO_PROCESSOR_MAPPING_NAMES)
)ZinstructblipInstructBlipVideoVideoProcessor)Zinstructblipvideor   )ZinternvlZInternVLVideoProcessor)Zllava_next_videoZLlavaNextVideoVideoProcessor)Zllava_onevisionZLlavaOnevisionVideoProcessor)Zqwen2_5_omniQwen2VLVideoProcessor)Z
qwen2_5_vlr   )Zqwen2_vlr   )ZsmolvlmZSmolVLMVideoProcessor)Zvideo_llavaZVideoLlavaVideoProcessor
class_namec              	   C   s   t  D ]'\}}| |v r+t|}td| d}zt|| W   S  ty*   Y qw qtj D ]\}}t|dd | krA|  S q1td}t	|| rQt|| S d S )N.ztransformers.models__name__Ztransformers)
r   itemsr   	importlibimport_modulegetattrAttributeErrorVIDEO_PROCESSOR_MAPPINGZ_extra_contenthasattr)r   module_nameZ
extractorsmodule_Z	extractorZmain_module r)   ]/var/www/auris/lib/python3.10/site-packages/transformers/models/auto/video_processing_auto.pyvideo_processor_class_from_nameJ   s$   	


r+   Fpretrained_model_name_or_path	cache_dirforce_downloadresume_downloadproxiestokenrevisionlocal_files_onlyc                 K   s   | dd}	|	durtdt |durtd|	}t| t|||||||d	}
|
du r2td i S t	|
dd}t
|W  d   S 1 sHw   Y  dS )	a  
    Loads the video processor configuration from a pretrained model video processor configuration.

    Args:
        pretrained_model_name_or_path (`str` or `os.PathLike`):
            This can be either:

            - a string, the *model id* of a pretrained model configuration hosted inside a model repo on
              huggingface.co.
            - a path to a *directory* containing a configuration file saved using the
              [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.

        cache_dir (`str` or `os.PathLike`, *optional*):
            Path to a directory in which a downloaded pretrained model configuration should be cached if the standard
            cache should not be used.
        force_download (`bool`, *optional*, defaults to `False`):
            Whether or not to force to (re-)download the configuration files and override the cached versions if they
            exist.
        resume_download:
            Deprecated and ignored. All downloads are now resumed by default when possible.
            Will be removed in v5 of Transformers.
        proxies (`Dict[str, str]`, *optional*):
            A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
            'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
        token (`str` or *bool*, *optional*):
            The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
            when running `huggingface-cli login` (stored in `~/.huggingface`).
        revision (`str`, *optional*, defaults to `"main"`):
            The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
            git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
            identifier allowed by git.
        local_files_only (`bool`, *optional*, defaults to `False`):
            If `True`, will only try to load the video processor configuration from local files.

    <Tip>

    Passing `token=True` is required when you want to use a private model.

    </Tip>

    Returns:
        `Dict`: The configuration of the video processor.

    Examples:

    ```python
    # Download configuration from huggingface.co and cache.
    video_processor_config = get_video_processor_config("llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
    # This model does not have a video processor config so the result will be an empty dict.
    video_processor_config = get_video_processor_config("FacebookAI/xlm-roberta-base")

    # Save a pretrained video processor locally and you can reload its config
    from transformers import AutoVideoProcessor

    video_processor = AutoVideoProcessor.from_pretrained("llava-hf/llava-onevision-qwen2-0.5b-ov-hf")
    video_processor.save_pretrained("video-processor-test")
    video_processor = get_video_processor_config("video-processor-test")
    ```use_auth_tokenNrThe `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.V`token` and `use_auth_token` are both specified. Please set only the argument `token`.)r-   r.   r/   r0   r1   r2   r3   zbCould not locate the video processor configuration file, will try to use the model config instead.zutf-8)encoding)popwarningswarnFutureWarning
ValueErrorr   r   loggerinfoopenjsonload)r,   r-   r.   r/   r0   r1   r2   r3   kwargsr4   Zresolved_config_filereaderr)   r)   r*   get_video_processor_configb   s8   E$rD   )ZvisionZtorchvision)backendsc                   @   s<   e Zd ZdZdd Zeeedd Ze		d
ddZ
d	S )AutoVideoProcessora%  
    This is a generic video processor class that will be instantiated as one of the video processor classes of the
    library when created with the [`AutoVideoProcessor.from_pretrained`] class method.

    This class cannot be instantiated directly using `__init__()` (throws an error).
    c                 C   s   t d)NzAutoVideoProcessor is designed to be instantiated using the `AutoVideoProcessor.from_pretrained(pretrained_model_name_or_path)` method.)EnvironmentError)selfr)   r)   r*   __init__   s   zAutoVideoProcessor.__init__c                 O   sv  | dd}|dur tdt |dddurtd||d< | dd}| dd}d|d	< tj|fi |\}}|d
d}	d}
d|di v rQ|d d }
|	du r}|
du r}| dd}|duri|dd}	d|di v r}|d d }|dd}
|	du r|
du rt	|t
stj|fd|i|}t|d
d}	t|drd|jv r|jd }
|	durt|	}	|
du}|	dupt|tv }t||||}|r|r|
}t||fi |}	| dd}tj|r|	  |	j|fi |S |	dur|	j|fi |S t|tv rtt| }	|	dur|	j|g|R i |S tdtd| dt dt dt dddd t D  
)ac  
        Instantiate one of the video processor classes of the library from a pretrained model vocabulary.

        The video processor class to instantiate is selected based on the `model_type` property of the config object
        (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible), or when it's
        missing, by falling back to using pattern matching on `pretrained_model_name_or_path`:

        List options

        Params:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                This can be either:

                - a string, the *model id* of a pretrained video_processor hosted inside a model repo on
                  huggingface.co.
                - a path to a *directory* containing a video processor file saved using the
                  [`~video_processing_utils.BaseVideoProcessor.save_pretrained`] method, e.g.,
                  `./my_model_directory/`.
                - a path or url to a saved video processor JSON *file*, e.g.,
                  `./my_model_directory/preprocessor_config.json`.
            cache_dir (`str` or `os.PathLike`, *optional*):
                Path to a directory in which a downloaded pretrained model video processor should be cached if the
                standard cache should not be used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force to (re-)download the video processor files and override the cached versions if
                they exist.
            resume_download:
                Deprecated and ignored. All downloads are now resumed by default when possible.
                Will be removed in v5 of Transformers.
            proxies (`Dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}.` The proxies are used on each request.
            token (`str` or *bool*, *optional*):
                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
                when running `huggingface-cli login` (stored in `~/.huggingface`).
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
                identifier allowed by git.
            return_unused_kwargs (`bool`, *optional*, defaults to `False`):
                If `False`, then this function returns just the final video processor object. If `True`, then this
                functions returns a `Tuple(video_processor, unused_kwargs)` where *unused_kwargs* is a dictionary
                consisting of the key/value pairs whose keys are not video processor attributes: i.e., the part of
                `kwargs` which has not been used to update `video_processor` and is otherwise ignored.
            trust_remote_code (`bool`, *optional*, defaults to `False`):
                Whether or not to allow for custom models defined on the Hub in their own modeling files. This option
                should only be set to `True` for repositories you trust and in which you have read the code, as it will
                execute code present on the Hub on your local machine.
            kwargs (`Dict[str, Any]`, *optional*):
                The values in kwargs of any keys which are video processor attributes will be used to override the
                loaded values. Behavior concerning key/value pairs whose keys are *not* video processor attributes is
                controlled by the `return_unused_kwargs` keyword parameter.

        <Tip>

        Passing `token=True` is required when you want to use a private model.

        </Tip>

        Examples:

        ```python
        >>> from transformers import AutoVideoProcessor

        >>> # Download video processor from huggingface.co and cache.
        >>> video_processor = AutoVideoProcessor.from_pretrained("llava-hf/llava-onevision-qwen2-0.5b-ov-hf")

        >>> # If video processor files are in a directory (e.g. video processor was saved using *save_pretrained('./test/saved_model/')*)
        >>> # video_processor = AutoVideoProcessor.from_pretrained("./test/saved_model/")
        ```r4   Nr5   r1   r6   configtrust_remote_codeTZ
_from_autoZvideo_processor_typerF   auto_mapZfeature_extractor_typeZFeatureExtractorZVideoProcessorZAutoFeatureExtractorZcode_revisionz_This video processor cannot be instantiated. Please make sure you have `torchvision` installed.z Unrecognized video processor in z2. Should have a `video_processor_type` key in its z of z3, or one of the following `model_type` keys in its z: z, c                 s   s    | ]}|V  qd S )Nr)   ).0cr)   r)   r*   	<genexpr>i  s    z5AutoVideoProcessor.from_pretrained.<locals>.<genexpr>)r8   r9   r:   r;   getr<   r   Zget_video_processor_dictreplace
isinstancer	   r   from_pretrainedr"   r%   rL   r+   typer$   r   r
   ospathisdirZregister_for_auto_class	from_dictr   r   joinr   keys)clsr,   ZinputsrB   r4   rJ   rK   Zconfig_dictr(   video_processor_classZvideo_processor_auto_mapZfeature_extractor_classZfeature_extractor_auto_mapZhas_remote_codeZhas_local_codeZ	class_refr)   r)   r*   rS      s   I


z"AutoVideoProcessor.from_pretrainedFc                 C   s   t j| ||d dS )a7  
        Register a new video processor for this class.

        Args:
            config_class ([`PretrainedConfig`]):
                The configuration corresponding to the model to register.
            video_processor_class ([`BaseVideoProcessor`]):
                The video processor to register.
        )exist_okN)r$   register)Zconfig_classr\   r]   r)   r)   r*   r^   l  s   zAutoVideoProcessor.registerN)F)r   
__module____qualname____doc__rI   classmethodr   r   rS   staticmethodr^   r)   r)   r)   r*   rF      s     rF   r$   )NFNNNNF)4ra   r    r@   rU   r9   collectionsr   typingr   r   r   r   r   Zconfiguration_utilsr	   Zdynamic_module_utilsr
   r   utilsr   r   r   r   r   Zutils.import_utilsr   Zvideo_processing_utilsr   Zauto_factoryr   Zconfiguration_autor   r   r   r   Z
get_loggerr   r=   r   str__annotations__r   Z
model_typeZvideo_processorsZfast_video_processor_classr$   r+   PathLikeboolrD   rF   __all__r)   r)   r)   r*   <module>   sn   
(


d 8