
    fThC                         S SK JrJrJr  S SKJrJrJrJr  S SK	J
r
Jr  SSKJr  SSKJrJr   " S S\S	S
9r " S S\S	S
9rSr " S S\5      rS/rg)    )ListOptionalUnion)ImagesKwargsProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput   )BatchFeature)
ImageInputmake_flat_list_of_imagesc                   6    \ rS rSr% \\   \S'   \\   \S'   Srg)Llama4ImagesKwargs"   max_patchesresize_to_max_canvas N)	__name__
__module____qualname____firstlineno__r   int__annotations__bool__static_attributes__r       d/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/llama4/processing_llama4.pyr   r   "   s    #"4.(r   r   F)totalc                   ,    \ rS rSr% \\S'   SSS00rSrg)Llama4ProcessorKwargs'   images_kwargstext_kwargspadding_sideleftr   N)r   r   r   r   r   r   	_defaultsr   r   r   r   r"   r"   '   s    %%F
Ir   r"   a>  {{- bos_token }}
{%- if custom_tools is defined %}
    {%- set tools = custom_tools %}
{%- endif %}
{%- if not tools_in_user_message is defined %}
    {%- set tools_in_user_message = true %}
{%- endif %}
{%- if not date_string is defined %}
    {%- if strftime_now is defined %}
        {%- set date_string = strftime_now("%d %b %Y") %}
    {%- else %}
        {%- set date_string = "26 Jul 2024" %}
    {%- endif %}
{%- endif %}
{%- if not tools is defined %}
    {%- set tools = none %}
{%- endif %}

{#- This block extracts the system message, so we can slot it into the right place. #}
{%- if messages[0]['role'] == 'system' %}    
    {%- if messages[0]['content'] is string %}
        {%- set system_message = messages[0]['content']|trim %}
    {%- else %}
        {#- FIXME: The processor requires an array, always. #}
        {%- set system_message = messages[0]['content'][0]['text']|trim %}
    {%- endif %}
    {%- set messages = messages[1:] %}
    {%- set user_supplied_system_message = true %}
{%- else %}
    {%- set system_message = "" %}
    {%- set user_supplied_system_message = false %}
{%- endif %}

{#- System message if the user supplied one #}
{%- if user_supplied_system_message %}
    {{- "<|header_start|>system<|header_end|>

" }}
    {%- if tools is not none %}
        {{- "Environment: ipython
" }}
    {%- endif %}
    {%- if tools is not none and not tools_in_user_message %}
        {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }}
        {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
        {{- "Do not use variables.

" }}
        {%- for t in tools %}
            {{- t | tojson(indent=4) }}
            {{- "

" }}
        {%- endfor %}
    {%- endif %}
    {{- system_message }}
    {{- "<|eot|>" }}
{%- endif %}

{#- Custom tools are passed in a user message with some extra guidance #}
{%- if tools_in_user_message and not tools is none %}
    {#- Extract the first user message so we can plug it in here #}
    {%- if messages | length != 0 %}
        {%- set first_user_message = messages[0]['content']|trim %}
        {%- set messages = messages[1:] %}
    {%- else %}
        {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }}
{%- endif %}
    {{- '<|header_start|>user<|header_end|>

' -}}
    {{- "Given the following functions, please respond with a JSON for a function call " }}
    {{- "with its proper arguments that best answers the given prompt.

" }}
    {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }}
    {{- "Do not use variables.

" }}
    {%- for t in tools %}
        {{- t | tojson(indent=4) }}
        {{- "

" }}
    {%- endfor %}
    {{- first_user_message + "<|eot|>"}}
{%- endif %}

{%- for message in messages %}
    {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
    {{- '<|header_start|>' + message['role'] + '<|header_end|>

' }}
        {%- if message['content'] is string %}
            {{- message['content'] }}
        {%- else %}
            {%- for content in message['content'] %}
                {%- if content['type'] == 'image' %}
                    {{- '<|image|>' }}
                {%- elif content['type'] == 'text' %}
                    {{- content['text'] }}
                {%- endif %}
            {%- endfor %}
        {%- endif %}
        {{- "<|eot|>" }}
    {%- elif 'tool_calls' in message and message.tool_calls|length > 0 %}
       {{- '<|header_start|>assistant<|header_end|>

' -}}
       {{- '<|python_start|>' }}
        {%- if message['content'] is string %}
            {{- message['content'] }}
        {%- else %}
            {%- for content in message['content'] %}
                {%- if content['type'] == 'image' %}
                    {{- '<|image|>' }}
                {%- elif content['type'] == 'text' %}
                    {{- content['text'] }}
                {%- endif %}
            {%- endfor %}
        {%- endif %}
       {{- '<|python_end|>' }}
        {%- for tool_call in message.tool_calls %}
           {{- '{"name": "' + tool_call.function.name + '", ' }}
           {{- '"parameters": ' }}
           {{- tool_call.function.arguments | tojson }}
           {{- "}" }}
        {%- endfor %}
       {{- "<|eot|>" }}
    {%- elif message.role == "tool" or message.role == "ipython" %}
        {{- "<|header_start|>ipython<|header_end|>

" }}
        {%- if message.content is mapping or message.content is iterable %}
            {{- message.content | tojson }}
        {%- else %}
            {{- message.content }}
        {%- endif %}
        {{- "<|eot|>" }}
    {%- endif %}
{%- endfor %}
{%- if add_generation_prompt %}
    {{- '<|header_start|>assistant<|header_end|>

' }}
{%- endif %}
c                      ^  \ rS rSrSrSS/r/ SQrSrSrSSS	S
SSSSSSS\	4S\
S\4U 4S jjjrS r    SS\\   S\\\\\\   \\   4      S\\   S\4S jjrS rS r\S 5       rSrU =r$ )Llama4Processor3   a  
Constructs a Llama4 processor which wraps a [`AutoImageProcessor`] and
[`PretrainedTokenizerFast`] tokenizer into a single processor that inherits both the image processor and
tokenizer functionalities. See the [`~Llama4Processor.__call__`] and [`~Llama4Processor.decode`] for more information.
Args:
    image_processor ([`AutoImageProcessor`], *optional*):
        The image processor is a required input.
    tokenizer ([`PreTrainedTokenizer`, `PreTrainedTokenizerFast`], *optional*):
        The tokenizer is a required input.
    patch_size (`int`, *optional*, defaults to 28):
        The size of image patches for tokenization.
    img_size (`int`, *optional*, defaults to 364):
        The size of the image to be tokenized. This should correspond to the size given to the image processor.
    image_token (`str`, *optional*, defaults to `"<|image|>"`):
        The token to be used to represent an image in the text.
    downsample_factor (`int`, *optional*, defaults to 1):
        The factor by which to scale the patch size.
    start_of_img_token (`str`, *optional*, defaults to `"<|START_OF_IMG|>"`):
        The token to be used to represent the start of an image in the text.
    end_of_img_token (`str`, *optional*, defaults to `"<|END_OF_IMG|>"`):
        The token to be used to represent the end of an image in the text.
    img_patch_token (`str`, *optional*, defaults to `"<|IMG_PATCH|>"`):
        The token to be used to represent an image patch in the text.
    img_line_break_token (`str`, *optional*, defaults to `"<|IMG_LINE_BREAK|>"`):
        The token to be used to represent a line break in the text.
    tile_token (`str`, *optional*, defaults to `"TILE"`):
        The token to be used to represent an image patch in the text.
    tile_global_token (`str`, *optional*, defaults to `"TILE_GLOBAL"`):
        The token to be used to represent the cover image in the text.
    chat_template (`str`, *optional*): A Jinja template which will be used to convert lists of messages
        in a chat into a tokenizable string.
image_processor	tokenizer)chat_templateimage_token
patch_sizeimg_sizedownsample_factorstart_of_img_tokenend_of_img_tokenimg_patch_tokenimg_line_break_token
tile_tokentile_global_tokenAutoImageProcessorAutoTokenizerN   g      ?	<|image|><|image_start|><|image_end|>	<|patch|><|tile_x_separator|><|tile_y_separator|>r0   pixel_shuffle_ratioc                    > [         TU ]  XUS9  [        [        SUS-  -  5      5      U l        X0l        XPl        X`l        UR                  U R                  5      U l	        Xpl
        Xl        Xl        Xl        Xl        g )N)r.   g      ?   )super__init__r   rounddownsample_ratior0   fake_image_tokenr/   convert_tokens_to_idsimage_token_idr3   r4   r5   r7   r8   )selfr,   r-   r0   rB   rI   r/   start_of_image_tokenend_of_image_tokenpatch_tokentile_x_separator_tokentile_y_separator_tokenr.   kwargs	__class__s                 r   rF   Llama4Processor.__init__f   s{      	=Q #E#1Da1G*H$I J$ 0&'==d>N>NO"6 2*0!7r   c                     SnUu  pEXE-  S:  a@  [        U5       H1  n[        U5       H  nUSU-  -  nXuS-
  :  d  M  US-  nM     US-  nM3     US-  nUSU-  -  nUS-  nU$ )z
Create a structured string representation of image tokens

Args:
   num_patches: Number of patches in the image

Returns:
    String with appropriate image tokens
r=      r?   r@   rA   r<   r>   )range)rL   aspect_rationum_patches_per_chunk
img_stringratio_hratio_wyyxxs           r   _prompt_split_image#Llama4Processor._prompt_split_image   s     '
'q Gn.B+0E"EEJaK'"&<<
 )
 44
 % 	k!
k$999
o%
r   imagestextrR   returnc                   ^  Uc  [        S5      eT R                  " [        4ST R                  R                  0UD6n[        U[        [        45      (       d  U/n0 nUGb  [        U5      nT R                  " SSU0US   D6nUS   S   R                  SS u  p[        UT R                  -  U	T R                  -  -  T R                  -  5      n
UR                  S	5      n[        U 4S
 jU 5       5      nU[!        U5      :w  a  [        SU S[!        U5       S35      eSn/ nU H  nUR#                  T R$                  5      nUS:X  a  UR'                  U5        M7  UR)                  T R$                  5      n/ n[+        U5       HI  u  nnUR'                  U5        UU:  d  M  T R-                  X   U
5      nUS-  nUR'                  U5        MK     UR'                  SR/                  U5      5        M     U[!        U5      :w  a  [        S5      eUnUS   R                  SS5      nT R                  " U40 US   D6nT R1                  UUS/S9  [3        0 UEUEUS9$ )a  
Main method to prepare for the model one or several sequences(s) and image(s). This method forwards the `text`
and `kwargs` arguments to PreTrainedTokenizerFast's [`~PreTrainedTokenizerFast.__call__`] to encode the text.
To prepare the vision inputs, this method forwards the `images` and `kwargs` arguments to
Llama4ImageProcessor's [`~Llama4ImageProcessor.__call__`] if `images` is not `None`.

Args:
    images (`PIL.Image.Image`, `np.ndarray`, `torch.Tensor`, `List[PIL.Image.Image]`, `List[np.ndarray]`, `List[torch.Tensor]`):
        The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
        tensor. Both channels-first and channels-last formats are supported.
    text (`str`, `List[str]`, `List[List[str]]`):
        The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
        (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
        `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
    return_tensors (`str` or [`~utils.TensorType`], *optional*):
        If set, will return tensors of a particular framework. Acceptable values are:
        - `'tf'`: Return TensorFlow `tf.constant` objects.
        - `'pt'`: Return PyTorch `torch.Tensor` objects.
        - `'np'`: Return NumPy `np.ndarray` objects.
        - `'jax'`: Return JAX `jnp.ndarray` objects.

Returns:
    [`BatchFeature`]: A [`BatchFeature`] with the following fields:

    - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
    - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
      `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
      `None`).
    - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
NzYou have to specify text.tokenizer_init_kwargsra   r$   pixel_valuesr   aspect_ratiosc              3   X   >#    U  H  oR                  TR                  5      v   M!     g 7fN)countrI   ).0promptrL   s     r   	<genexpr>+Llama4Processor.__call__.<locals>.<genexpr>   s#     $\W[V\\$2G2G%H%HW[s   '*zFound z) placeholders across the batch, but have z flattened images.rV    zONumber of image placeholders in the prompt does not match the number of images.r%   return_tensorsimage)
modalities)datatensor_typer   )
ValueError_merge_kwargsr"   r-   init_kwargs
isinstancelisttupler   r,   shaper   r0   rH   popsumlenrk   rI   appendsplit	enumerater_   join_check_special_mm_tokensr   )rL   ra   rb   audiovideosrR   output_kwargsimage_inputsimage_heightimage_widthrY   rh   total_placeholdersimage_indexprocessed_textrm   placeholder_countprompt_splits
new_promptlocal_image_index
split_parttokens_for_this_imagerq   text_inputss   `                       r   __call__Llama4Processor.__call__   s   L <899**!
"&.."<"<
 
 $u..6D -f5F//`v`A_`L(4^(DQ(G(M(Mbc(R%L$'0[DOO5STX\XmXmm%! ),,_=M!$$\W[$\!\!S[0 /0 1  #F},>@ 
 KN$*LL1F1F$G!$)"))&1 &T-B-B C
5>}5M1%z%%j1(+<<040H0H)68M1- $q("))*?@ 6N %%bggj&9:! $ c&k) !rss!D&}599:JDQnnTJ]=-IJ%%dKWI%N!@K!@<!@n]]r   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
)r-   batch_decoderL   argsrR   s      r   r   Llama4Processor.batch_decode  s    
 ~~**D;F;;r   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
the docstring of this method for more information.
)r-   decoder   s      r   r   Llama4Processor.decode	  s    
 ~~$$d5f55r   c                     U R                   R                  nU R                  R                  n[        U5      [        U5      -   $ rj   )r-   model_input_namesr,   rz   )rL   tokenizer_input_namesimage_processor_input_namess      r   r   !Llama4Processor.model_input_names  s;     $ @ @&*&:&:&L&L#)*T2M-NNNr   )
rH   r4   rI   r/   rK   r5   r0   r3   r8   r7   )NNNN)r   r   r   r   __doc__
attributesvalid_kwargsimage_processor_classtokenizer_classr.   r   floatrF   r_   r   r   r   r   r
   r   r	   r"   r   r   r   r   propertyr   r   __classcell__)rS   s   @r   r*   r*   3   s    B $[1JL 1%O %($.*55#8 	8
 #8 8<: (,hla^$a^ uY(94	?DQbLccdea^ ./a^ 
a^F<6 O Or   r*   N)typingr   r   r   transformers.processing_utilsr   r   r   r	   $transformers.tokenization_utils_baser
   r   image_processing_utilsr   image_utilsr   r   r   r"   r.   r*   __all__r   r   r   <module>r      sf   " ) (  N 2)U )
,E  ]PaOn aOH 
r   