
    fTh&                     *   S SK JrJr  SSKJr  SSKJrJrJrJ	r	J
r
Jr  SSKJrJr  \	" 5       (       a  S SKJr  SSKJr  \" 5       (       a  SS	KJr  \" 5       (       a
  S S
KrSSKJr  \
R0                  " \5      r\" \" SSS95       " S S\5      5       rg
)    )ListUnion   )GenerationConfig)add_end_docstringsis_tf_availableis_torch_availableis_vision_availableloggingrequires_backends   )Pipelinebuild_pipeline_init_args)Image)
load_image)'TF_MODEL_FOR_VISION_2_SEQ_MAPPING_NAMESN)$MODEL_FOR_VISION_2_SEQ_MAPPING_NAMEST)has_tokenizerhas_image_processorc                      ^  \ rS rSrSrSr\" SS9rU 4S jrSS jr	SS\
\\\   S	\S	   4   4U 4S
 jjjrSS jrS rS rSrU =r$ )ImageToTextPipeline.   a  
Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image.

Unless the model you're using explicitly sets these generation parameters in its configuration files
(`generation_config.json`), the following default values will be used:
- max_new_tokens: 256

Example:

```python
>>> from transformers import pipeline

>>> captioner = pipeline(model="ydshieh/vit-gpt2-coco-en")
>>> captioner("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png")
[{'generated_text': 'two birds are standing next to each other '}]
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

This image to text pipeline can currently be loaded from pipeline() using the following task identifier:
"image-to-text".

See the list of available models on
[huggingface.co/models](https://huggingface.co/models?pipeline_tag=image-to-text).
T   )max_new_tokensc                    > [         TU ]  " U0 UD6  [        U S5        U R                  U R                  S:X  a  [
        5        g [        5        g )Nvisiontf)super__init__r   check_model_type	frameworkr   r   )selfargskwargs	__class__s      \/var/www/auris/envauris/lib/python3.13/site-packages/transformers/pipelines/image_to_text.pyr   ImageToTextPipeline.__init__P   sD    $)&)$)7;~~7M3	
Sw	
    c                    0 n0 nUb  X6S'   Ub  XFS'   Ub  XS'   Ub%  Ub  SU;   a  [        S5      eUR                  U5        U R                  b  U R                  US'   U R                  b  U R                  US'   U R                  US'   Xe0 4$ )Nprompttimeoutr   zp`max_new_tokens` is defined both as an argument and inside `generate_kwargs` argument, please use only 1 versionassistant_model	tokenizerassistant_tokenizer)
ValueErrorupdater,   r.   r-   )r"   r   generate_kwargsr*   r+   forward_paramspreprocess_paramss          r&   _sanitize_parameters(ImageToTextPipeline._sanitize_parametersW   s    *0h'+2i(%/=+,&).>/.Q &  !!/2+040D0DN,-##/*...N;'484L4LN01 "44r(   inputszImage.Imagec                 p   > SU;   a  UR                  S5      nUc  [        S5      e[        TU ]  " U40 UD6$ )a'  
Assign labels to the image(s) passed as inputs.

Args:
    inputs (`str`, `List[str]`, `PIL.Image` or `List[PIL.Image]`):
        The pipeline handles three types of images:

        - A string containing a HTTP(s) link pointing to an image
        - A string containing a local path to an image
        - An image loaded in PIL directly

        The pipeline accepts either a single image or a batch of images.

    max_new_tokens (`int`, *optional*):
        The amount of maximum tokens to generate. By default it will use `generate` default.

    generate_kwargs (`Dict`, *optional*):
        Pass it to send all of these arguments directly to `generate` allowing full control of this function.

    timeout (`float`, *optional*, defaults to None):
        The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
        the call may block forever.

Return:
    A list or a list of list of `dict`: Each result comes as a dictionary with the following key:

    - **generated_text** (`str`) -- The generated text.
imageszBCannot call the image-to-text pipeline without an inputs argument!)popr/   r   __call__)r"   r6   r$   r%   s      r&   r:   ImageToTextPipeline.__call__r   sA    < vZZ)F>abbw1&11r(   c                    [        XS9nUGb  [        R                  S5        [        U[        5      (       d  [        S[        U5       S35      eU R                  R                  R                  nUS:X  a  U R                  XR                  S9nU R                  S:X  a  UR                  U R                  5      nU R                  USS	9R                  nU R                  R                   /U-   n["        R$                  " U5      R'                  S
5      nUR)                  SU05        GOUS:X  aF  U R                  XU R                  S9nU R                  S:X  a  UR                  U R                  5      nOUS:w  ao  U R                  XR                  S9nU R                  S:X  a  UR                  U R                  5      nU R                  X R                  S9nUR)                  U5        OS[        SU S35      eU R                  XR                  S9nU R                  S:X  a  UR                  U R                  5      nU R                  R                  R                  S:X  a  Uc  S US'   U$ )N)r+   u   Passing `prompt` to the `image-to-text` pipeline is deprecated and will be removed in version 4.48 of 🤗 Transformers. Use the `image-text-to-text` pipeline insteadz&Received an invalid text input, got - zy - but expected a single string. Note also that one single text can be provided for conditional image to text generation.git)r8   return_tensorsptF)textadd_special_tokensr   	input_ids
pix2struct)r8   header_textr>   zvision-encoder-decoder)r>   zModel type z- does not support conditional text generation)r   loggerwarning_once
isinstancestrr/   typemodelconfig
model_typeimage_processorr!   totorch_dtyper-   rB   cls_token_idtorchtensor	unsqueezer0   )r"   imager*   r+   rL   model_inputsrB   text_inputss           r&   
preprocessImageToTextPipeline.preprocess   s2   52W fc** <T&\N Ko o 
 **55JU"#335Q_Q_3`>>T)#/??43C3C#DL NN5NQ[[	!^^889IE	!LL3==a@	##[)$<=|+#335eieses3t>>T)#/??43C3C#DL77#335Q_Q_3`>>T)#/??43C3C#DL"nnVNNnS##K0 !;zl:g!hii  //u^^/\L~~%+t/?/?@::''50V^(,L%r(   c                 2   SU;   a7  [        US   [        5      (       a  [        S US    5       5      (       a  S US'   SU;  a  U R                  US'   UR	                  U R
                  R                  5      nU R
                  R                  " U40 UDUD6nU$ )NrB   c              3   (   #    U  H  oS L v   M
     g 7fN ).0xs     r&   	<genexpr>/ImageToTextPipeline._forward.<locals>.<genexpr>   s     A'@!I'@s   generation_config)rG   listallra   r9   rJ   main_input_namegenerate)r"   rU   r1   r6   model_outputss        r&   _forwardImageToTextPipeline._forward   s     <'<4d;;A|K'@AAA(,L% o5373I3IO/0 !!$**"<"<=

++FVlVoVr(   c                 v    / nU H0  nSU R                   R                  USS90nUR                  U5        M2     U$ )Ngenerated_textT)skip_special_tokens)r-   decodeappend)r"   rf   records
output_idsrecords        r&   postprocessImageToTextPipeline.postprocess   sL    'J $.."7"7(, #8 #F NN6" ( r(   r\   )NNNNr[   )NN)__name__
__module____qualname____firstlineno____doc___pipeline_calls_generater   _default_generation_configr   r4   r   rH   r   r:   rW   rg   rq   __static_attributes____classcell__)r%   s   @r&   r   r   .   sf    4  $!1"
56"2uS$s)]DDW%WX "2 "2H1f,
 
r(   r   )typingr   r   
generationr   utilsr   r   r	   r
   r   r   baser   r   PILr   image_utilsr   models.auto.modeling_tf_autor   rQ   models.auto.modeling_autor   
get_loggerrs   rE   r   r\   r(   r&   <module>r      s      )  5 (VP			H	% ,4UYZ[z( z \zr(   