
    fTh+                         S r SSKJrJr  SSKJr  SSKJrJrJ	r	  SSK
JrJrJrJr  SSKJrJr  SSKJr  \R(                  " \5      r " S	 S
\SS9rS\4S jrS r " S S\5      rS/rg)z
Processor class for Pixtral.
    )ListUnion   )BatchFeature)
ImageInputis_valid_image
load_image)ProcessingKwargsProcessorMixinUnpack!_validate_images_text_input_order)PreTokenizedInput	TextInput)loggingc                   (    \ rS rSrSS00 SS0S.rSrg)	PixtralProcessorKwargs   paddingFreturn_tensorspt)text_kwargsimages_kwargscommon_kwargs N)__name__
__module____qualname____firstlineno__	_defaults__static_attributes__r       f/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/pixtral/processing_pixtral.pyr   r      s$     u
 d
Ir!   r   F)totalreturnc                 R    [        U [        5      =(       a    U R                  S5      $ )Nhttp)
isinstancestr
startswith)vals    r"   is_urlr+   ,   s    c3:CNN6$::r!   c                 <    [        U 5      =(       d    [        U 5      $ N)r+   r   )elems    r"   is_image_or_image_urlr/   1   s    $</>$//r!   c            
          ^  \ rS rSrSrSS/r/ SQrSrSr        SS\	S	\	4U 4S
 jjjr
    SS\S\\\\\   \\   4   S\\   S\4S jjrS rS r\S 5       rSrU =r$ )PixtralProcessor5   a  
Constructs a Pixtral processor which wraps a Pixtral image processor and a Pixtral tokenizer into a single processor.

[`PixtralProcessor`] offers all the functionalities of [`CLIPImageProcessor`] and [`LlamaTokenizerFast`]. See the
[`~PixtralProcessor.__call__`] and [`~PixtralProcessor.decode`] for more information.

Args:
    image_processor ([`PixtralImageProcessor`], *optional*):
        The image processor is a required input.
    tokenizer ([`LlamaTokenizerFast`], *optional*):
        The tokenizer is a required input.
    patch_size (`int`, *optional*, defaults to 16):
        Patch size from the vision tower.
    spatial_merge_size (`int`, *optional*, defaults to 1):
        The downsampling factor for the spatial merge operation.
    chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
        in a chat into a tokenizable string.
    image_token (`str`, *optional*, defaults to `"[IMG]"`):
        Special token used to denote image location.
    image_break_token (`str`, *optional*, defaults to `"[IMG_BREAK]"`):
        Special token used to denote the end of a line of pixels in an image.
    image_end_token (`str`, *optional*, defaults to `"[IMG_END]"`):
        Special token used to denote the end of an image input.
image_processor	tokenizer)chat_template
patch_sizespatial_merge_sizeimage_tokenimage_break_tokenimage_end_tokenAutoImageProcessorAutoTokenizerr6   r7   c	                    > X0l         X@l        X`l        UR                  U R                  5      U l        Xpl        Xl        [        T
U ]!  XUS9  g )N)r5   )	r6   r7   r8   convert_tokens_to_idsimage_token_idr9   r:   super__init__)selfr3   r4   r6   r7   r5   r8   r9   r:   kwargs	__class__s             r"   rA   PixtralProcessor.__init__[   sM     %"4&'==d>N>NO!2.=Qr!   imagestextrC   r$   c                    [        X5      u  pU R                  " [        4SU R                  R                  0UD6nU R
                  U R                  -  nUGb  [        U5      (       a  U/nO[        U[        [        45      (       a  [        US   5      (       a  Ow[        U[        [        45      (       aQ  [        US   [        [        45      (       a3  [        US   S   5      (       a  U VV	s/ s H  o  H  oPM     M     nnn	O[        S5      eU V
s/ s H%  n
[        U
[        5      (       a  [        U
5      OU
PM'     nn
U R                  " U4SU0US   D6nO0 n[        U[        5      (       a  U/nO8[        U[        5      (       d#  [        US   [        5      (       d  [        S5      eUnUR                  S5      Gb'  [!        US	   5      n/ n/ nU GH  nU R"                  U;   a  [%        U5      u  nnUU-  nUU-  nU R"                  /U-  U R&                  /-   /U-  nU VVs/ s H  o  H  nUPM     M     nnnU R(                  US
'   SR+                  U5      nUR-                  U5        UR/                  U R"                  SS5      nU R"                  U;   a  M  SU;   a,  UR1                  S5      nUR/                  SUS5      nSU;   a  M,  UR-                  U5        GM     US   R1                  SS5      nU R                  " U40 US   D6nU R3                  UUS/S9  [5        0 UEUEUS9$ s  sn	nf s  sn
f s  snnf )a  
Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
and `kwargs` arguments to LlamaTokenizerFast's [`~LlamaTokenizerFast.__call__`] if `text` is not `None` to encode
the text. To prepare the image(s), this method forwards the `images` and `kwrags` arguments to
CLIPImageProcessor's [`~CLIPImageProcessor.__call__`] if `images` is not `None`. Please refer to the docstring
of the above two methods for more information.

Args:
    images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
        The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
        tensor. Both channels-first and channels-last formats are supported.
    text (`str`, `List[str]`, `List[List[str]]`):
        The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
        (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
        `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
    return_tensors (`str` or [`~utils.TensorType`], *optional*):
        If set, will return tensors of a particular framework. Acceptable values are:

        - `'tf'`: Return TensorFlow `tf.constant` objects.
        - `'pt'`: Return PyTorch `torch.Tensor` objects.
        - `'np'`: Return NumPy `np.ndarray` objects.
        - `'jax'`: Return JAX `jnp.ndarray` objects.

Returns:
    [`BatchFeature`]: A [`BatchFeature`] with the following fields:

    - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
    - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
      `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
    `None`).
    - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
tokenizer_init_kwargsNr   zdInvalid input images. Please provide a single image, a list of images, or a list of lists of images.r6   r   zAInvalid input text. Please provide a string, or a list of stringspixel_valuesimage_sizes z<placeholder>   r   r   image)
modalities)datatensor_type)r   _merge_kwargsr   r4   init_kwargsr6   r7   r/   r'   listtuple
ValueErrorr(   r	   r3   getiterr8   nextr9   r:   joinappendreplacepop_check_special_mm_tokensr   )rB   rF   rG   audiovideosrC   output_kwargsr6   sublistrO   imimage_inputsprompt_stringsrK   replace_stringssampleheightwidthnum_height_tokensnum_width_tokensreplace_tokensitemreplace_strr   text_inputss                            r"   __call__PixtralProcessor.__call__o   sT   R 9F**"
"&.."<"<
 
 __t'>'>>
$V,, FT5M227LVTUY7W7W6D%=11vay4-88)&)A,77/5KvG7%%7%vK z  OUUf
2s(;(;jnCfFU//p:pQ^_nQopLLdC  6DD$''
47C0H0H`aa N+7|M:;KN O&&&0$($5MFE(.*(<%',
':$))*-==AWAW@XX&)&*N ;I%].wU\TdU\d.N%])-)=)=N2&"$''."9K#**;7#^^D,<,<oqQF &&&0 &/"1"5"5a"8K#^^O[!LF &/ %%f-% ( '}599:JDQnn^T}]7ST%%nkwi%X!@K!@<!@n]]] L
 V6 &^s   /L7,L=,Mc                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
)r4   batch_decoderB   argsrC   s      r"   rt   PixtralProcessor.batch_decode   s    
 ~~**D;F;;r!   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
the docstring of this method for more information.
)r4   decoderu   s      r"   ry   PixtralProcessor.decode   s    
 ~~$$d5f55r!   c                     U R                   R                  nU R                  R                  n[        [        R                  X-   5      5      $ r-   )r4   model_input_namesr3   rU   dictfromkeys)rB   tokenizer_input_namesimage_processor_input_namess      r"   r|   "PixtralProcessor.model_input_names   s>     !% @ @&*&:&:&L&L#DMM"7"UVWWr!   )r9   r:   r8   r?   r6   r7   )NN   rN   Nz[IMG]z[IMG_BREAK]z	[IMG_END])NNNN)r   r   r   r   __doc__
attributesvalid_kwargsimage_processor_classtokenizer_classintrA   r   r   r   r   r   r   r   r   rq   rt   ry   propertyr|   r    __classcell__)rD   s   @r"   r1   r1   5   s    2 $[1JL 1%O "#'#R 	R
  R R, "^bk^k^ I0$y/4HYCZZ[k^ /0k^ 
k^\<6 X Xr!   r1   N)r   typingr   r   feature_extraction_utilsr   image_utilsr   r   r	   processing_utilsr
   r   r   r   tokenization_utils_baser   r   utilsr   
get_loggerr   loggerr   boolr+   r/   r1   __all__r   r!   r"   <module>r      sq     4 A A k k C  
		H	%	-U 	;4 ;
0|X~ |X~ 
r!   