
    fTh%                        S SK JrJrJr  SSKJr  SSKJrJrJ	r	J
r
  SSKJrJr  \	" 5       (       a  S SKJr  SSKJr  \" 5       (       a  SS	KJr  SS
KJr  \
R,                  " \5      r\" \" SSS95       " S S\5      5       rg)    )ListOptionalUnion   )GenerationConfig)add_end_docstringsis_torch_availableis_vision_availablelogging   )Pipelinebuild_pipeline_init_args)Image)
load_image)1MODEL_FOR_VISUAL_QUESTION_ANSWERING_MAPPING_NAMES)
KeyDatasetT)has_tokenizerhas_image_processorc            	          ^  \ rS rSrSrSr\" SS9rU 4S jrSS jr	 SS\
S	\\S	   \\   S
4   S\\
\\\   4      4U 4S jjjrSS jrS rSS jrSrU =r$ )VisualQuestionAnsweringPipeline   aS  
Visual Question Answering pipeline using a `AutoModelForVisualQuestionAnswering`. This pipeline is currently only
available in PyTorch.

Unless the model you're using explicitly sets these generation parameters in its configuration files
(`generation_config.json`), the following default values will be used:
- max_new_tokens: 256

Example:

```python
>>> from transformers import pipeline

>>> oracle = pipeline(model="dandelin/vilt-b32-finetuned-vqa")
>>> image_url = "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/lena.png"
>>> oracle(question="What is she wearing ?", image=image_url)
[{'score': 0.948, 'answer': 'hat'}, {'score': 0.009, 'answer': 'fedora'}, {'score': 0.003, 'answer': 'clothes'}, {'score': 0.003, 'answer': 'sun hat'}, {'score': 0.002, 'answer': 'nothing'}]

>>> oracle(question="What is she wearing ?", image=image_url, top_k=1)
[{'score': 0.948, 'answer': 'hat'}]

>>> oracle(question="Is this a person ?", image=image_url, top_k=1)
[{'score': 0.993, 'answer': 'yes'}]

>>> oracle(question="Is this a man ?", image=image_url, top_k=1)
[{'score': 0.996, 'answer': 'no'}]
```

Learn more about the basics of using a pipeline in the [pipeline tutorial](../pipeline_tutorial)

This visual question answering pipeline can currently be loaded from [`pipeline`] using the following task
identifiers: `"visual-question-answering", "vqa"`.

The models that this pipeline can use are models that have been fine-tuned on a visual question answering task. See
the up-to-date list of available models on
[huggingface.co/models](https://huggingface.co/models?filter=visual-question-answering).
T   )max_new_tokensc                 P   > [         TU ]  " U0 UD6  U R                  [        5        g N)super__init__check_model_typer   )selfargskwargs	__class__s      h/var/www/auris/envauris/lib/python3.13/site-packages/transformers/pipelines/visual_question_answering.pyr   (VisualQuestionAnsweringPipeline.__init__B   s$    $)&)OP    c                     0 0 pvUb  X&S'   Ub  X6S'   Ub  XFS'   Ub  XS'   0 n[        U SS 5      b  U R                  US'   [        U SS 5      b  U R                  US'   U R                  US'   XhU4$ )Npadding
truncationtimeouttop_kassistant_modelassistant_tokenizer	tokenizer)getattrr+   r-   r,   )	r   r*   r'   r(   r)   r!   preprocess_paramspostprocess_paramsforward_paramss	            r#   _sanitize_parameters4VisualQuestionAnsweringPipeline._sanitize_parametersF   s    02B-+2i(!.8l++2i(*/w'4*D1=040D0DN,-4.5A*...N;'484L4LN01 2DDDr%   imagezImage.Imager   questionc                   > [        U[        5      n[        U[        5      =(       a    [        S U 5       5      n[        U[        5      =(       a    [        S U 5       5      n[        U[        R                  [
        45      (       a  [        U[
        5      (       a  XS.nOU(       d  U(       a*  [        U[
        5      (       a  U Vs/ s H  oUS.PM	     nnO[        U[        R                  [
        45      (       a  U(       a  U V	s/ s H  oU	S.PM	     nn	OBU(       d  U(       a1  U(       a*  / n
U H  n	U H  nU
R                  XS.5        M     M!     U
nO Un[        TU ]   " U40 UD6nU$ s  snf s  sn	f )a  
Answers open-ended questions about images. The pipeline accepts several types of inputs which are detailed
below:

- `pipeline(image=image, question=question)`
- `pipeline({"image": image, "question": question})`
- `pipeline([{"image": image, "question": question}])`
- `pipeline([{"image": image, "question": question}, {"image": image, "question": question}])`

Args:
    image (`str`, `List[str]`, `PIL.Image`, `List[PIL.Image]` or `KeyDataset`):
        The pipeline handles three types of images:

        - A string containing a http link pointing to an image
        - A string containing a local path to an image
        - An image loaded in PIL directly

        The pipeline accepts either a single image or a batch of images. If given a single image, it can be
        broadcasted to multiple questions.
        For dataset: the passed in dataset must be of type `transformers.pipelines.pt_utils.KeyDataset`
        Example:
        ```python
        >>> from transformers.pipelines.pt_utils import KeyDataset
        >>> from datasets import load_dataset

        >>> dataset = load_dataset("detection-datasets/coco")
        >>> oracle(image=KeyDataset(dataset, "image"), question="What's in this image?")

        ```
    question (`str`, `List[str]`):
        The question(s) asked. If given a single question, it can be broadcasted to multiple images.
        If multiple images and questions are given, each and every question will be broadcasted to all images
        (same effect as a Cartesian product)
    top_k (`int`, *optional*, defaults to 5):
        The number of top labels that will be returned by the pipeline. If the provided number is higher than
        the number of labels available in the model configuration, it will default to the number of labels.
    timeout (`float`, *optional*, defaults to None):
        The maximum time in seconds to wait for fetching images from the web. If None, no timeout is set and
        the call may block forever.
Return:
    A dictionary or a list of dictionaries containing the result. The dictionaries contain the following keys:

    - **label** (`str`) -- The label identified by the model.
    - **score** (`int`) -- The score attributed by the model for that label.
c              3   b   #    U  H%  n[        U[        R                  [        45      v   M'     g 7fr   )
isinstancer   str.0items     r#   	<genexpr>;VisualQuestionAnsweringPipeline.__call__.<locals>.<genexpr>   s'     8pjobfD5;;X[J\9]9]jos   -/c              3   B   #    U  H  n[        U[        5      v   M     g 7fr   )r8   r9   r:   s     r#   r=   r>      s     >jaiY]z$PS?T?Tais   )r4   r5   )	r8   r   listallr   r9   appendr   __call__)r   r4   r5   r!   
is_datasetis_image_batchis_question_batchinputsimqquestion_image_pairsresultsr"   s               r#   rC   (VisualQuestionAnsweringPipeline.__call__Z   s;   f  z2
#E40pS8pjo8p5p&x6j3>jai>j;jeekk3/00Z#5N5N$;F

8S0I0IDIJEb9EFJFS1227H?GHx!15xFHF
0A#% B(//"0LM    *F F'"64V4% KHs   
E1E6c                    [        US   US9nU R                  US   U R                  UUS9nU R                  XPR                  S9nU R                  S:X  a  UR	                  U R
                  5      nUR                  U5        U$ )Nr4   )r)   r5   )return_tensorsr'   r(   )imagesrN   pt)r   r-   	frameworkimage_processortotorch_dtypeupdate)r   rG   r'   r(   r)   r4   model_inputsimage_featuress           r#   
preprocess*VisualQuestionAnsweringPipeline.preprocess   s    6'?G<~~:>>!	 & 
 --U>>-Z>>T!+..t/?/?@NN+r%   c                     U R                   R                  5       (       a5  SU;  a  U R                  US'   U R                   R                  " S0 UDUD6nU$ U R                   " S0 UD6nU$ )Ngeneration_config )modelcan_generater[   generate)r   rV   generate_kwargsmodel_outputss       r#   _forward(VisualQuestionAnsweringPipeline._forward   sk    ::""$$"/97;7M7M 34 JJ//R,R/RM  !JJ66Mr%   c           	         U R                   R                  5       (       a;  U Vs/ s H-  nSU R                  R                  USS9R	                  5       0PM/     sn$ X R                   R
                  R                  :  a   U R                   R
                  R                  nU R                  S:X  a1  UR                  R                  5       S   nUR                  U5      u  pVO[        SU R                   35      eUR                  5       nUR                  5       n[        XV5       VVs/ s H*  u  pxXpR                   R
                  R                  U   S.PM,     snn$ s  snf s  snnf )NanswerT)skip_special_tokensrP   r   zUnsupported framework: )scorere   )r]   r^   r-   decodestripconfig
num_labelsrQ   logitssigmoidtopk
ValueErrortolistzipid2label)	r   ra   r*   
output_idsprobsscoresidsrg   _ids	            r#   postprocess+VisualQuestionAnsweringPipeline.postprocess   s0   ::""$$ #0"/J 4>>00QU0V\\^_"/ 
 zz((333

))44~~%%,,446q9#jj/ #:4>>:J!KLL]]_F**,CbeflbrsbrT^TYezz/@/@/I/I#/NObrss!  ts   4E%1Er\   )NNNNr   )FFN)   )__name__
__module____qualname____firstlineno____doc___pipeline_calls_generater   _default_generation_configr   r2   r   r9   r   r   rC   rX   rb   rx   __static_attributes____classcell__)r"   s   @r#   r   r      s    $L  $!1"QE. 59L]Cm)<d3iUVL 5d3i01L L\	t tr%   r   N)typingr   r   r   
generationr   utilsr   r	   r
   r   baser   r   PILr   image_utilsr   models.auto.modeling_autor   pt_utilsr   
get_loggerr{   loggerr   r\   r%   r#   <module>r      sw    ( ( ) X X 4 (]$			H	% ,4UYZ[~th ~t \~tr%   