o
    Zh<                     @   s   d dl Z d dlmZmZmZ d dlZddlmZ ddl	m
Z
mZ ddlmZmZmZmZ ddlmZmZ ddlmZ G d	d
 d
eZG dd deddZG dd deZdgZdS )    N)ListOptionalUnion   )BatchFeature)
ImageInputmake_nested_list_of_images)ImagesKwargsProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput)	to_py_objc                   @   sJ   e Zd ZU ee ed< ee ed< ee ed< ee ed< ee ed< dS )Gemma3ImagesKwargsdo_pan_and_scanpan_and_scan_min_crop_sizepan_and_scan_max_num_crops"pan_and_scan_min_ratio_to_activateZdo_convert_rgbN)__name__
__module____qualname__r   bool__annotations__intfloat r   r   [/var/www/auris/lib/python3.10/site-packages/transformers/models/gemma3/processing_gemma3.pyr      s   
 r   c                   @   s.   e Zd ZU eed< ddiddddddZd	S )
Gemma3ProcessorKwargsimages_kwargspaddingF      g333333?)r   r   r   r   )text_kwargsr   N)r   r   r   r   r   	_defaultsr   r   r   r   r   $   s   
 
r   F)totalc                
       s   e Zd ZddgZddgZdZdZ		ddef fd	d
Z				dde	de
eeee ee f dee defddZdd Zdd Zedd Z  ZS )Gemma3Processorimage_processor	tokenizerchat_templateimage_seq_lengthZAutoImageProcessorZAutoTokenizerNr!   c                    sh   || _ |j| _|j| _|j| _d|jg| }d|j | |j d| _t jd|||d| d S )N z

)r'   r(   r)   r   )	r*   image_token_id	boi_tokenZimage_tokenjoinZ	eoi_tokenfull_image_sequencesuper__init__)selfr'   r(   r)   r*   kwargsZimage_tokens_expanded	__class__r   r   r1   9   s   
zGemma3Processor.__init__imagestextr3   returnc                    sp  |d u r|d u rt djtfdjji|}t|tr"|g}nt|ts2t|d ts2t di }|d urt|}j	|fi |d }|sRfdd|D }t
|t
|krit dt
| d	t
| d
t|d  fdd|D }	tt|||	D ]a\}
\}} dd tj|D }t
|t
|krt dt
| dt
| dttt |D ]-\}}|rdj ddjg|  }|d | | ||t
j d   }|||
< qqfdd|D }|d dd }jd!d|i|d ddi}j||dgd |d }t|d }d||jk< dd | D }| |d< ti |||d S )"Nz+Provide at least one of `text` or `images`.Ztokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsr   c                    s"   g | ]}d   jgt| qS ) )r.   r-   len.0r6   r2   r   r   
<listcomp>l   s   " z,Gemma3Processor.__call__.<locals>.<listcomp>z1Received inconsistently sized batches of images (z) and text (z).	num_cropsc                    s&   g | ]} fd dt t|D qS )c                    s   g | ]}  d qS )r   )pop)r<   _r?   r   r   r>   u   s    z7Gemma3Processor.__call__.<locals>.<listcomp>.<listcomp>)ranger:   r;   rB   r   r   r>   u   s   & c                 S   s   g | ]}|  qS r   )start)r<   mr   r   r   r>   w   s    zPrompt contained z image tokens but received z images.zHere is the original image z0 and here are some crops to help you see better r9   c                    s   g | ]
}|  j jqS r   )replacer-   r/   )r<   promptr=   r   r   r>      s    r#   return_tensorsr7   npimage)Z
modalitiesZ	input_ids   c                 S   s   i | ]	\}}||  qS r   )tolist)r<   kvr   r   r   
<dictcomp>   s    z,Gemma3Processor.__call__.<locals>.<dictcomp>token_type_ids)dataZtensor_typer   )
ValueErrorZ_merge_kwargsr   r(   Zinit_kwargs
isinstancestrlistr   r'   r:   r   r@   	enumerateziprefinditerr-   reversedr.   Z_check_special_mm_tokensrI   Z
zeros_liker,   itemsrL   r   )r2   r6   r7   ZvideosZaudior3   Zoutput_kwargsZimage_inputsZbatched_imagesZbatch_num_cropsZ	batch_idxrG   Zimage_indexesnumidxZformatted_image_textrH   Ztext_inputsZ	array_idsZmm_token_type_idsr   )r?   r2   r   __call__O   sf   
&
 zGemma3Processor.__call__c                 O      | j j|i |S )z
        This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r(   batch_decoder2   argsr3   r   r   r   r`         zGemma3Processor.batch_decodec                 O   r_   )z
        This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r(   decodera   r   r   r   rd      rc   zGemma3Processor.decodec                 C   s(   | j jdg }| jj}tt|| S )NrP   )r(   model_input_namesr'   rU   dictfromkeys)r2   Ztokenizer_input_namesZimage_processor_input_namesr   r   r   re      s   z!Gemma3Processor.model_input_names)Nr!   )NNNN)r   r   r   
attributesZvalid_kwargsZimage_processor_classZtokenizer_classr   r1   r   r   r   r   r   r   r   r   r^   r`   rd   propertyre   __classcell__r   r   r4   r   r&   3   s6    
Ir&   )rX   typingr   r   r   numpyrI   Zfeature_extraction_utilsr   Zimage_utilsr   r   Zprocessing_utilsr	   r
   r   r   Ztokenization_utils_baser   r   utilsr   r   r   r&   __all__r   r   r   r   <module>   s   
{