
    fThO3                        S r SSKJr  SSKJrJrJrJr  SSKJ	r	  SSK
JrJrJr  SSKJrJrJrJrJr  SSKJrJr  SS	KJr  \(       a  SS
KJr  \R4                  " \5      rS\4S jrS r " S S\SS9r  " S S\SS9r! " S S\5      r"S/r#g)z
Processor class for IDEFICS2.
    )
accumulate)TYPE_CHECKINGListOptionalUnion   )BatchFeature)
ImageInputis_valid_image
load_image)ImagesKwargsProcessingKwargsProcessorMixinUnpack!_validate_images_text_input_order)
AddedToken	TextInput)logging)PreTokenizedInputreturnc                 R    [        U [        5      =(       a    U R                  S5      $ )Nhttp)
isinstancestr
startswith)vals    h/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/idefics2/processing_idefics2.pyis_urlr   *   s    c3:CNN6$::    c                 <    [        U 5      =(       d    [        U 5      $ N)r   r   )elems    r   is_image_or_image_urlr#   .   s    $</>$//r   c                   &    \ rS rSr% \\   \S'   Srg)Idefics2ImagesKwargs2   image_seq_len N)__name__
__module____qualname____firstlineno__r   int__annotations____static_attributes__r(   r   r   r%   r%   2   s    C= r   r%   F)totalc                   2    \ rS rSr% \\S'   SSSS.0 S.rSrg)	Idefics2ProcessorKwargs6   images_kwargsTF)add_special_tokenspaddingis_split_into_words)text_kwargsr4   r(   N)r)   r*   r+   r,   r%   r.   	_defaultsr/   r(   r   r   r2   r2   6   s$    '' #'#(

 Ir   r2   c            
          ^  \ rS rSrSrSS/rSS/rSrSr SS\	S\
\   4U 4S	 jjjrS
 r    SS\\\\   \\\      4   S\\S\\   \S   4   S\\   S\4S jjrS rS r\S 5       rSrU =r$ )Idefics2ProcessorC   a  
Constructs a IDEFICS2 processor which wraps a LLama tokenizer and IDEFICS2 image processor into a single processor.

[`IdeficsProcessor`] offers all the functionalities of [`Idefics2ImageProcessor`] and [`LlamaTokenizerFast`]. See
the docstring of [`~IdeficsProcessor.__call__`] and [`~IdeficsProcessor.decode`] for more information.

Args:
    image_processor (`Idefics2ImageProcessor`):
        An instance of [`Idefics2ImageProcessor`]. The image processor is a required input.
    tokenizer (`PreTrainedTokenizerBase`, *optional*):
        An instance of [`PreTrainedTokenizerBase`]. This should correspond with the model's text model. The tokenizer is a required input.
    image_seq_len (`int`, *optional*, defaults to 64):
        The length of the image sequence i.e. the number of <image> tokens per image in the input.
        This parameter is used to build the string from the input prompt and image tokens and should match the
        config.perceiver_config.resampler_n_latents value for the model used.
    chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
        in a chat into a tokenizable string.
image_processor	tokenizerr'   chat_templateIdefics2ImageProcessorAutoTokenizerc                 J  > Uc  [        S5      eUc  [        S5      e[        US5      (       d  [        SSSS9R                  U l        [        SSSS9R                  U l        S	U R                  U R
                  /0nUR                  U5        UR                  U R
                  5      U l        O3UR                  U l        UR
                  U l        UR                  U l        [        S
SSS9U l
        UR                  S	U R                  /05        X0l        [        TU ]5  XUS9  g )Nz)You need to specify an `image_processor`.z"You need to specify a `tokenizer`.image_tokenz<fake_token_around_image>FT)
normalizedspecialz<image>additional_special_tokensz<end_of_utterance>)r?   )
ValueErrorhasattrr   contentfake_image_tokenrC   r5   convert_tokens_to_idsimage_token_idimage_boundary_tokenend_of_utterance_tokenr'   super__init__)selfr=   r>   r'   r?   kwargstokens_to_add	__class__s          r   rP   Idefics2Processor.__init__\   s    "HIIABBy-00$./JW\fj$k$s$sD!))tT\\D84;P;PRVRbRb:cdM((7"+"A"A$BRBR"SD$-$B$BD!(44D"+":":D&01ERWae&f#$$&ADD_D_C`%ab*=Qr   c                     / nU Hn  n/ nU HR  n[        U5      (       a  UR                  U5        M&  [        U5      (       d  M8  UR                  [        U5      5        MT     UR                  U5        Mp     U$ r!   )r   appendr   r   )rQ   promptsprompt_imagespromptimagesr"   s         r   _extract_images_from_prompts.Idefics2Processor._extract_images_from_promptsu   si    FF!$''MM$'D\\MM*T"23	 
   (  r   r[   textr   rR   r   c                 @   Uc  Uc  [        S5      e[        X5      u  pU R                  " [        4SU R                  R
                  0UD6nUS   R                  SS5      nUb  UOU R                  nUS   R                  SS5      n/ n	0 n
UGb5  [        U[        5      (       a  U/nO8[        U[        5      (       d#  [        US   [        5      (       d  [        S	5      eU R                  nU R                  nU X-   U 3nU R                  R                  (       a
  US
-  nUS
-  n/ nU H\  nU	R                  UR!                  U5      5        UR#                  X5      nUR#                  U U 3U 5      nUR                  U5        M^     U R                  " U40 US   D6nU R%                  UUS/S9  U
R'                  U5        UGb  [)        U5      (       a  U//nGO[        U[        [*        45      (       a  [)        US   5      (       a  Ub  [-        U	5      [/        U5      :w  a*  [        SW S[-        U	5       SU S[/        U5       S3	5      eS/[        [1        U	5      5      -   n[3        [/        U	5      5       Vs/ s H  nUUU   UUS-       PM     nnO^U/nOZ[        U[        [*        45      (       d?  [        US   [        [*        45      (       d!  [)        US   S   5      (       d  [        S5      eU Vs/ s H  n[/        U5      PM     nnUb  UU	:X  d  [        SU	 SU S35      eU VVs/ s H  o Vs/ s H  n[5        U5      PM     snPM      nnnU R                  " U40 US   D6nU
R'                  U5        [7        XS9$ s  snf s  snf s  snf s  snnf )a  
Processes the input prompts and returns a BatchEncoding.

Example:

```python
>>> import requests
>>> from transformers import Idefics2Processor
>>> from transformers.image_utils import load_image

>>> processor = Idefics2Processor.from_pretrained("HuggingFaceM4/idefics2-8b", image_seq_len=2)
>>> processor.image_processor.do_image_splitting = False  # Force as False to simplify the example

>>> url1 = "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg"
>>> url2 = "https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg"

>>> image1, image2 = load_image(url1), load_image(url2)
>>> images = [[image1], [image2]]

>>> text = [
...     "<image>In this image, we see",
...     "bla bla bla<image>",
... ]
>>> outputs = processor(images=images, text=text, return_tensors="pt", padding=True)
>>> input_ids = outputs.input_ids
>>> input_tokens = processor.tokenizer.batch_decode(input_ids)
>>> print(input_tokens)
['<s><fake_token_around_image><image><image><fake_token_around_image> In this image, we see', '<s> bla bla bla<fake_token_around_image><image><image><fake_token_around_image>']
```

Args:
    images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`, *optional*):
        The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
        tensor. If is of type `List[ImageInput]`, it's assumed that this is for a single prompt i.e. of batch size 1.
    text (`Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]]`, *optional*):
        The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
        (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
        `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).

        Wherever an image token, `<image>` is encountered it is expanded to
        `<fake_token_around_image>` + `<image>` * `image_seq_len` * <fake_token_around_image>`.
    return_tensors (`Union[str, TensorType]`, *optional*):
        If set, will return tensors of a particular framework. See [`PreTrainedTokenizerFast.__call__`] for more
        information.

Nz+You must provide either `text` or `images`.tokenizer_init_kwargsr4   r'   r8   return_tensorsr   zAInvalid input text. Please provide a string, or a list of strings   image)
modalitieszThe total number of zP tokens in the prompts should be the same as the number of images passed. Found  z tokens and z images.   zdInvalid input images. Please provide a single image or a list of images or a list of list of images.z!The number of images in the text z and images  z should be the same.)tensor_type)rG   r   _merge_kwargsr2   r>   init_kwargspopr'   r   r   listrJ   rC   r=   do_image_splittingrW   countreplace_check_special_mm_tokensupdater#   tuplesumlenr   ranger   r	   )rQ   r[   r^   audiovideosrR   output_kwargsr'   ra   n_images_in_textinputsrJ   rC   	image_strprompt_stringssampletext_inputscumsum_images_in_textin_images_in_imagesimimage_inputss                         r   __call__Idefics2Processor.__call__   s   l <FNJKK8F**#
"&.."<"<
 

 &o6::?DQ)6)BHZHZ&}599:JDQ$$$vd++JtAw4L4L !dee  $44**K+,[-H,IJZI[\I##66%M	"N ''[(AB?+;*<=M<N(OTdSeg%%f-  ..X=;WXK)).+SZR[)\MM+&$V,,!(FT5M227LVTUY7W7W#+,F;(2;- @&&)*:&;%<Ak],WZ[aWbVcckm 
 ./C$zBR7S2T,T) "'s+;'<!=!=A 4Q7:OPQTUPU:VW!=  F
 %XF ve}55"6!9tUm<<-fQil;; z  =C!CF&#f+F!C(:>N(N 78H7IWiVjj~ 
 GMMfF7"z"~7fFM//Y-:XYLMM,'F??7  "D 8Ms$   ?N<N3	N<NNNc                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
)r>   batch_decoderQ   argsrR   s      r   r   Idefics2Processor.batch_decode  s    
 ~~**D;F;;r   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
the docstring of this method for more information.
)r>   decoder   s      r   r   Idefics2Processor.decode  s    
 ~~$$d5f55r   c                     U R                   R                  nU R                  R                  n[        [        R                  X-   5      5      $ r!   )r>   model_input_namesr=   rk   dictfromkeys)rQ   tokenizer_input_namesimage_processor_input_namess      r   r   #Idefics2Processor.model_input_names  s<     $ @ @&*&:&:&L&L#DMM"7"UVWWr   )rN   rJ   r'   rC   rL   )N@   N)NNNN)r)   r*   r+   r,   __doc__
attributesvalid_kwargsimage_processor_classtokenizer_classr-   r   r   rP   r\   r   r
   r   r   r   r2   r	   r   r   r   propertyr   r/   __classcell__)rT   s   @r   r;   r;   C   s    & $[1J#_5L4%O hlR>ARW_`cWdR R2
 OSbfJ@j$z"2Dj9I4JJKJ@ I2DOTJ]E^^_J@ 01J@ 
J@X<6 X Xr   r;   N)$r   	itertoolsr   typingr   r   r   r   feature_extraction_utilsr	   image_utilsr
   r   r   processing_utilsr   r   r   r   r   tokenization_utils_baser   r   utilsr   r   
get_loggerr)   loggerboolr   r#   r%   r2   r;   __all__r(   r   r   <module>r      s    ! 7 7 4 A A  =  < 
		H	%;4 ;0!<u !
.e 
\X \X~ 
r   