o
    Zh                     @   s   d Z ddlmZmZmZ ddlmZ ddlmZm	Z	m
Z
mZ ddlmZmZmZ ddlmZ G dd	 d	ed
dZG dd de	d
dZeeZG dd de
ZdgZdS )z!
Processor class for Pix2Struct.
    )ListOptionalUnion   )BatchFeature)ImagesKwargsProcessingKwargsProcessorMixinUnpack)BatchEncodingPreTokenizedInput	TextInput)loggingc                   @   s:   e Zd ZU ee ed< eeeee	e e	e f  ed< dS )Pix2StructImagesKwargsmax_patchesheader_textN)
__name__
__module____qualname__r   int__annotations__r   r   r   r    r   r   c/var/www/auris/lib/python3.10/site-packages/transformers/models/pix2struct/processing_pix2struct.pyr      s   
 $r   F)totalc                
   @   s8   e Zd ZU eed< dddddddddd	ddidZd	S )
Pix2StructProcessorKwargsimages_kwargsTFr   )	add_special_tokenspaddingZstrideZreturn_overflowing_tokensZreturn_special_tokens_maskZreturn_offsets_mappingreturn_token_type_idsZreturn_lengthverboser   i   )text_kwargsr   N)r   r   r   r   r   	_defaultsr   r   r   r   r       s   
 
r   c                	       s   e Zd ZdZddgZdZdZ fddZ				dd	ee	e
ee	 ee
 f d
ee deeef fddZdd Zdd Zedd Z  ZS )Pix2StructProcessora  
    Constructs a PIX2STRUCT processor which wraps a BERT tokenizer and PIX2STRUCT image processor into a single
    processor.

    [`Pix2StructProcessor`] offers all the functionalities of [`Pix2StructImageProcessor`] and [`T5TokenizerFast`]. See
    the docstring of [`~Pix2StructProcessor.__call__`] and [`~Pix2StructProcessor.decode`] for more information.

    Args:
        image_processor (`Pix2StructImageProcessor`):
            An instance of [`Pix2StructImageProcessor`]. The image processor is a required input.
        tokenizer (Union[`T5TokenizerFast`, `T5Tokenizer`]):
            An instance of ['T5TokenizerFast`] or ['T5Tokenizer`]. The tokenizer is a required input.
    image_processor	tokenizerZPix2StructImageProcessor)ZT5TokenizerZT5TokenizerFastc                    s   d|_ t || d S )NF)r   super__init__)selfr#   r$   	__class__r   r   r&   J   s   zPix2StructProcessor.__init__Ntextkwargsreturnc           
      K   s\  |du r|du rt d| jtfd| jji|}|d dd}|du rG| jjsG|dur/|nd|d d< | j| _| jdd|i|d }|S | jjsW| j|fi |d }	n|d 	d	| | j|fi |d }	|dur| jjs|durx|nd
|d d< | jdd|i|d }d|v r|d|d< d|v r|d|d< nd}|dur|	
| |	S )a  
        This method uses [`Pix2StructImageProcessor.preprocess`] method to prepare image(s) for the model, and
        [`T5TokenizerFast.__call__`] to prepare text for the model.

        Please refer to the docstring of the above two methods for more information.
        Nz*You have to specify either images or text.Ztokenizer_init_kwargsr    r   Tr*   r   r   FZattention_maskZdecoder_attention_maskZ	input_idsZdecoder_input_idsr   )
ValueErrorZ_merge_kwargsr   r$   Zinit_kwargspopr#   Zis_vqaZcurrent_processor
setdefaultupdate)
r'   Zimagesr*   ZaudioZvideosr+   Zoutput_kwargsr   text_encodingZencoding_image_processorr   r   r   __call__N   sB   


zPix2StructProcessor.__call__c                 O      | j j|i |S )z
        This method forwards all its arguments to Pix2StructTokenizerFast's [`~PreTrainedTokenizer.batch_decode`].
        Please refer to the docstring of this method for more information.
        )r$   batch_decoder'   argsr+   r   r   r   r4         z Pix2StructProcessor.batch_decodec                 O   r3   )z
        This method forwards all its arguments to Pix2StructTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please
        refer to the docstring of this method for more information.
        )r$   decoder5   r   r   r   r8      r7   zPix2StructProcessor.decodec                 C   s"   | j j}| jj}tt|| S )N)r$   model_input_namesr#   listdictfromkeys)r'   Ztokenizer_input_namesZimage_processor_input_namesr   r   r   r9      s   z%Pix2StructProcessor.model_input_names)NNNN)r   r   r   __doc__
attributesZimage_processor_classZtokenizer_classr&   r   r   r   r   r
   r   r   r   r2   r4   r8   propertyr9   __classcell__r   r   r(   r   r"   7   s*    

:r"   N)r=   typingr   r   r   Zfeature_extraction_utilsr   Zprocessing_utilsr   r   r	   r
   Ztokenization_utils_baser   r   r   utilsr   r   r   Z
get_loggerr   loggerr"   __all__r   r   r   r   <module>   s   

f