o
    Zh                       @   s   d dl mZmZmZmZ ddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZmZ ddlmZ dd	lmZ G d
d de
ddZG dd deZdgZdS )    )DictListOptionalUnion   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput)
TensorType   )AutoTokenizerc                   @   s$   e Zd ZddidddejdZdS )AriaProcessorKwargspaddingF  )Zmax_image_sizeZsplit_image)text_kwargsimages_kwargsreturn_tensorsN)__name__
__module____qualname__r   ZPYTORCH	_defaults r   r   W/var/www/auris/lib/python3.10/site-packages/transformers/models/aria/processing_aria.pyr      s    
r   F)totalc                       s   e Zd ZdZddgZddgZdZdZ				ddee	e
f dee
 deeeeef ef  f fd	d
Z			ddeeeee ee f dee dee defddZdd Zdd Zedd Z  ZS )AriaProcessora  
    AriaProcessor is a processor for the Aria model which wraps the Aria image preprocessor and the LLama slow tokenizer.

    Args:
        image_processor (`AriaImageProcessor`, *optional*):
            The AriaImageProcessor to use for image preprocessing.
        tokenizer (`PreTrainedTokenizerBase`, *optional*):
            An instance of [`PreTrainedTokenizerBase`]. This should correspond with the model's text model. The tokenizer is a required input.
        chat_template (`str`, *optional*):
            A Jinja template which will be used to convert lists of messages in a chat into a tokenizable string.
        size_conversion (`Dict`, *optional*):
            A dictionary indicating size conversions for images.
    image_processor	tokenizerchat_templatesize_conversionZAriaImageProcessorr   Nc                    sf   |d u r	ddd}dd |  D | _|j| _|j| _|d ur(|jd u r(|j|_t j|||d d S )N      )i  r   c                 S   s   i | ]	\}}t ||qS r   )int).0kvr   r   r   
<dictcomp>I   s    z*AriaProcessor.__init__.<locals>.<dictcomp>)r!   )itemsr"   image_tokenZimage_token_idZ	pad_tokenZ	unk_tokensuper__init__)selfr   r    r!   r"   	__class__r   r   r-   @   s   
zAriaProcessor.__init__textimageskwargsreturnc                 K   s  | j tfd| jji|}t|tr|g}nt|ts&t|d ts&td|dur^| j|fi |d }| j	|j
jd  }g }	|d| }
|D ]}|| jj| jj|
 }|	| qIni }|}	|d d	d}| j|	fi |d }| j|	|d
gd ti |||dS )a  
        Main method to prepare for the model one or several sequences(s) and image(s).

        Args:
            text (`TextInput`, `PreTokenizedInput`, `List[TextInput]`, `List[PreTokenizedInput]`):
                The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
                (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
                `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
            images (`ImageInput`):
                The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
                tensor. Both channels-first and channels-last formats are supported.


        Returns:
            [`BatchFeature`]: A [`BatchFeature`] with the following fields:
            - **input_ids** -- List of token ids to be fed to a model. Returned when `text` is not `None`.
            - **attention_mask** -- List of indices specifying which tokens should be attended to by the model (when
            `return_attention_mask=True` or if *"attention_mask"* is in `self.model_input_names` and if `text` is not
            `None`).
            - **pixel_values** -- Pixel values to be fed to a model. Returned when `images` is not `None`.
            - **pixel_mask** -- Pixel mask to be fed to a model. Returned when `images` is not `None`.
        Ztokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsNr   r   	num_cropsr   r   image)Z
modalities)dataZtensor_type)Z_merge_kwargsr   r    Zinit_kwargs
isinstancestrlist
ValueErrorr   r"   Zpixel_valuesshapepopreplacer+   appendZ_check_special_mm_tokensr   )r.   r1   r2   ZaudioZvideosr3   Zoutput_kwargsZimage_inputsZtokens_per_imageZprompt_stringsr5   sampler   Ztext_inputsr   r   r   __call__R   s<   
zAriaProcessor.__call__c                 O      | j j|i |S )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r    batch_decoder.   argsr3   r   r   r   rC         zAriaProcessor.batch_decodec                 O   rB   )z
        This method forwards all its arguments to LlamaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r    decoderD   r   r   r   rG      rF   zAriaProcessor.decodec                 C   s0   | j j}| jj}dd |D }tt|| S )Nc                 S   s   g | ]}|d kr|qS )r5   r   )r&   namer   r   r   
<listcomp>   s    z3AriaProcessor.model_input_names.<locals>.<listcomp>)r    model_input_namesr   r:   dictfromkeys)r.   Ztokenizer_input_namesZimage_processor_input_namesr   r   r   rJ      s   zAriaProcessor.model_input_names)NNNN)NNN)r   r   r   __doc__
attributesZvalid_kwargsZimage_processor_classZtokenizer_classr   r   r9   r   r   floatr%   r-   r   r   r   r   r   r   r   rA   rC   rG   propertyrJ   __classcell__r   r   r/   r   r   ,   sB    

@r   N)typingr   r   r   r   Zimage_processing_utilsr   Zimage_utilsr   Zprocessing_utilsr	   r
   r   Ztokenization_utilsr   r   utilsr   autor   r   r   __all__r   r   r   r   <module>   s   
