o
    Zh2                     @   s   d dl Z d dlmZ d dlmZmZ ddlmZmZm	Z	m
Z
mZmZ ddlmZmZ e
 r9d dlmZ dd	lmZ e	 rFd dlZdd
lmZ e rUddlmZ ddlmZ eeZeeddG dd deZdS )    N)UserDict)ListUnion   )add_end_docstringsis_tf_availableis_torch_availableis_vision_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args)Image)
load_image)6MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES)9TF_MODEL_FOR_ZERO_SHOT_IMAGE_CLASSIFICATION_MAPPING_NAMES)stable_softmaxT)Zhas_image_processorc                       st   e Zd ZdZ fddZddeeee ded f f fddZdd	d
Z					dddZ
dd Zdd Z  ZS )#ZeroShotImageClassificationPipelineaL  
    Zero shot image classification pipeline using `CLIPModel`. This pipeline predicts the class of an image when you
    provide an image and a set of `candidate_labels`.

    Example:

    ```python
    >>> from transformers import pipeline

    >>> classifier = pipeline(model="google/siglip-so400m-patch14-384")
    >>> classifier(
    ...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
    ...     candidate_labels=["animals", "humans", "landscape"],
    ... )
    [{'score': 0.965, 'label': 'animals'}, {'score': 0.03, 'label': 'humans'}, {'score': 0.005, 'label': 'landscape'}]

    >>> classifier(
    ...     "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png",
    ...     candidate_labels=["black and white", "photorealist", "painting"],
    ... )
    [{'score': 0.996, 'label': 'black and white'}, {'score': 0.003, 'label': 'photorealist'}, {'score': 0.0, 'label': 'painting'}]
    ```

    Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

    This image classification pipeline can currently be loaded from [`pipeline`] using the following task identifier:
    `"zero-shot-image-classification"`.

    See the list of available models on
    [huggingface.co/models](https://huggingface.co/models?filter=zero-shot-image-classification).
    c                    s>   t  jdi | t| d | | jdkrt d S t d S )NZvisiontf )super__init__r   Zcheck_model_type	frameworkr   r   )selfkwargs	__class__r   d/var/www/auris/lib/python3.10/site-packages/transformers/pipelines/zero_shot_image_classification.pyr   C   s   

z,ZeroShotImageClassificationPipeline.__init__Nimager   c                    s6   d|v r	| d}|du rtdt j|fi |S )a  
        Assign labels to the image(s) passed as inputs.

        Args:
            image (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
                The pipeline handles three types of images:

                - A string containing a http link pointing to an image
                - A string containing a local path to an image
                - An image loaded in PIL directly

            candidate_labels (`List[str]`):
                The candidate labels for this image. They will be formatted using *hypothesis_template*.

            hypothesis_template (`str`, *optional*, defaults to `"This is a photo of {}"`):
                The format used in conjunction with *candidate_labels* to attempt the image classification by
                replacing the placeholder with the candidate_labels. Pass "{}" if *candidate_labels* are
                already formatted.

            timeout (`float`, *optional*, defaults to None):
                The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
                the call may block forever.

        Return:
            A list of dictionaries containing one entry per proposed label. Each dictionary contains the
            following keys:
            - **label** (`str`) -- One of the suggested *candidate_labels*.
            - **score** (`float`) -- The score attributed by the model to that label. It is a value between
                0 and 1, computed as the `softmax` of `logits_per_image`.
        imagesNzSCannot call the zero-shot-image-classification pipeline without an images argument!)pop
ValueErrorr   __call__)r   r   r   r   r   r   r#   M   s
    
z,ZeroShotImageClassificationPipeline.__call__c                 K   sf   i }d|v r|d |d< d|v r|d |d< d|v r |d |d< |d ur.t dt ||d< |i i fS )Ncandidate_labelstimeouthypothesis_templatez^The `tokenizer_kwargs` argument is deprecated and will be removed in version 5 of Transformerstokenizer_kwargs)warningswarnFutureWarning)r   r'   r   Zpreprocess_paramsr   r   r   _sanitize_parameterss   s   
z8ZeroShotImageClassificationPipeline._sanitize_parametersThis is a photo of {}.c           
         s   |d u ri }t ||d}| j|g| jd}| jdkr || j}||d<  fdd|D }ddi}d	| jjjv r@|jd
ddd || | j	|fd| ji|}	|	g|d< |S )N)r%   )r    return_tensorsptr$   c                    s   g | ]}  |qS r   )format).0xr&   r   r   
<listcomp>   s    zBZeroShotImageClassificationPipeline.preprocess.<locals>.<listcomp>paddingTsiglip
max_length@   )r4   r6   Z
truncationr-   text_inputs)
r   Zimage_processorr   toZtorch_dtypemodelconfig
model_typeupdateZ	tokenizer)
r   r   r$   r&   r%   r'   Zinputs	sequencesZtokenizer_default_kwargsr8   r   r2   r   
preprocess   s   


z.ZeroShotImageClassificationPipeline.preprocessc                 C   s\   | d}| d}t|d tr|d }n|d d }| jdi ||}||jd}|S )Nr$   r8   r   )r$   logitsr   )r!   
isinstancer   r:   Zlogits_per_image)r   Zmodel_inputsr$   r8   Zoutputsmodel_outputsr   r   r   _forward   s   


z,ZeroShotImageClassificationPipeline._forwardc                 C   s   | d}|d d }| jdkr,d| jjjv r,t|d}| }t	|t
s+|g}n5| jdkrG|jddd}| }t	|t
sF|g}n| jdkrYt|dd	}|  }ntd
| j dd tt||dd dD }|S )Nr$   r@   r   r.   r5   )dimr   )ZaxiszUnsupported framework: c                 S   s   g | ]	\}}||d qS ))scorelabelr   )r0   rF   Zcandidate_labelr   r   r   r3      s    zCZeroShotImageClassificationPipeline.postprocess.<locals>.<listcomp>c                 S   s
   | d  S )Nr   r   )r1   r   r   r   <lambda>   s   
 zAZeroShotImageClassificationPipeline.postprocess.<locals>.<lambda>)key)r!   r   r:   r;   r<   torchZsigmoidZsqueezetolistrA   listZsoftmaxr   numpyr"   sortedzip)r   rB   r$   r@   ZprobsZscoresresultr   r   r   postprocess   s,   




z/ZeroShotImageClassificationPipeline.postprocess)N)Nr,   NN)__name__
__module____qualname____doc__r   r   strr   r#   r+   r?   rC   rQ   __classcell__r   r   r   r   r   !   s     (

&
r   )r(   collectionsr   typingr   r   utilsr   r   r   r	   r
   r   baser   r   ZPILr   Zimage_utilsr   rJ   Zmodels.auto.modeling_autor   Zmodels.auto.modeling_tf_autor   Ztf_utilsr   Z
get_loggerrR   loggerr   r   r   r   r   <module>   s"     
