
    fTh+                         S r SSKrSSKJrJrJr  SSKJr  SSKJ	r	  SSK
JrJrJrJrJrJr  SSKJrJr  SS	KJr  S
SKJr  \R0                  " \5      r " S S\	5      rS/rg)zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
    N)ListOptionalUnion   )BatchFeature)ProcessorMixin)
AddedTokenBatchEncodingPaddingStrategyPreTokenizedInput	TextInputTruncationStrategy)
TensorTypelogging)
VideoInput   )AutoTokenizerc            $       N  ^  \ rS rSrSr/ SQrS/rSrSrSr	S U 4S jjr
                S!S\S	\\\\\   \\   4   S
\S\\\\4   S\\\\4   S\\   S\S\\   S\\   S\S\S\S\S\S\S\\\\4      S\4"S jjrS rS r\S 5       rU 4S jr\U 4S j5       rSr U =r!$ )"InstructBlipVideoProcessor(   a  
Constructs an InstructBLIPVideo processor which wraps a InstructBLIP image processor and a LLaMa/T5 tokenizer into a single
processor.

[`InstructBlipVideoProcessor`] offers all the functionalities of [`InstructBlipVideoImageProcessor`] and [`AutoTokenizer`]. See the
docstring of [`~InstructBlipVideoProcessor.__call__`] and [`~InstructBlipVideoProcessor.decode`] for more information.

Args:
    video_processor (`InstructBlipVideoVideoProcessor`):
        An instance of [`InstructBlipVideoVideoProcessor`]. The video processor is a required input.
    tokenizer (`AutoTokenizer`):
        An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
    qformer_tokenizer (`AutoTokenizer`):
        An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
    num_query_tokens (`int`, *optional*):
        Number of tokens used by the Qformer as queries, should be same as in model's config.
)video_processor	tokenizerqformer_tokenizernum_query_tokensAutoVideoProcessorr   c                    > [        US5      (       d,  [        SSSS9U l        UR                  U R                  /SS9  OUR                  U l        X@l        [
        TU ]  XU5        g )Nvideo_tokenz<video>FT)
normalizedspecial)special_tokens)hasattrr	   r   
add_tokensr   super__init__)selfr   r   r   r   kwargs	__class__s         z/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/instructblipvideo/processing_instructblipvideo.pyr$   #InstructBlipVideoProcessor.__init__A   sa    y-00))tTD  $"2"2!3D I(44D 05FG    imagestextadd_special_tokenspadding
truncation
max_lengthstridepad_to_multiple_ofreturn_attention_maskreturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverbosereturn_tensorsreturnc                    Uc  Uc  [        S5      e[        5       nUGb  [        U[        5      (       a  U/nO8[        U[        5      (       d#  [        US   [        5      (       d  [        S5      eU R
                  " SUUUUUUUU	U
UUUUUSS.UD6nU R                  b  Ub~  0 nU R                  R                  U R                  -  S-  nU R                  U/[        U5      -  SSS9nU H1  n[        UU   UU   5       VVs/ s H  u  nnUU-   PM     snnUU'   M3     OUnUb  [        R                  S	5        [        UUS
9nUR                  U5        U R                  " SUUUUUUUU	U
UUUUUUS.UD6nUR!                  S5      US'   UR!                  S5      US'   Ub!  U R#                  UUS9nUR                  U5        U$ s  snnf )a  
This method uses [`InstructBlipVideoImageProcessor.__call__`] method to prepare image(s) or video(s) for the model, and
[`BertTokenizerFast.__call__`] to prepare text for the model.

Please refer to the docstring of the above two methods for more information.
Nz3You have to specify at least one of images or text.r   zAInvalid input text. Please provide a string, or a list of strings)r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:      F)r-   r:   aK  Expanding inputs for video tokens in InstructBLIPVideo should be done in processing. Please follow instruction here (https://gist.github.com/zucchini-nlp/65f22892b054dc0d68228af56fbeaac2) to update your InstructBLIPVideo model. Using processors without these attributes in the config is deprecated and will throw an error in v4.47.)tensor_type	input_idsqformer_input_idsattention_maskqformer_attention_mask)r:    )
ValueErrorr   
isinstancestrlistr   r   r   contentlenziploggerwarning_oncer
   updater   popr   )r%   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r&   encoding_text_encodingtext_encodingvideo_tokensvideo_token_encodingkimg_encodingtxt_encodingqformer_text_encodingimage_encodings                               r(   __call__#InstructBlipVideoProcessor.__call__J   sK   4 >dlRSS>$$$vd++JtAw4L4L !dee!^^ #5%%#5&;*C+E'=&;+#  !N* $$0V5G "$$,,t/D/DDqH  (,~~!NSY.5Y] (6 ($ (A ;>>RST>UWefgWh:i(:i6L, %|3:i(M!$ ( !/%''B *-^TMOOM*$($:$: %#5%%#5&;*C+E'=&;+-%  !%!$ -B,E,Ek,RH()1F1J1JK[1\H-.!11&1XNOON+S(s   Gc                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
)r   batch_decoder%   argsr&   s      r(   r\   'InstructBlipVideoProcessor.batch_decode   s    
 ~~**D;F;;r*   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
the docstring of this method for more information.
)r   decoder]   s      r(   ra   !InstructBlipVideoProcessor.decode   s    
 ~~$$d5f55r*   c                     U R                   R                  nU R                  R                  n[        [        R                  X-   5      5      $ N)r   model_input_namesimage_processorrG   dictfromkeys)r%   tokenizer_input_namesimage_processor_input_namess      r(   re   ,InstructBlipVideoProcessor.model_input_names   s>     !% @ @&*&:&:&L&L#DMM"7"UVWWr*   c                   > [         R                  R                  U5      (       a  [        SU S35      e[         R                  " USS9  [         R                  R                  US5      nU R                  R                  U5        SU R                  ;   nU(       a  U R                  R                  S5        [        TU ]  " U40 UD6nU(       a  U =R                  S/-  sl        U$ )NzProvided path (z#) should be a directory, not a fileT)exist_okr   )ospathisfilerD   makedirsjoinr   save_pretrained
attributesremover#   )r%   save_directoryr&   qformer_tokenizer_pathqformer_presentoutputsr'   s         r(   rs   *InstructBlipVideoProcessor.save_pretrained   s    77>>.))~.>>abcc
NT2!#n>Q!R../EF .@OO""#67').CFCOO 344Or*   c                    > [         TU ]  " U40 UD6n[        U[        5      (       a  US   n[        R                  " USS9nXCl        U$ )Nr   r   )	subfolder)r#   from_pretrainedrE   tupler   r   )clspretrained_model_name_or_pathr&   	processorr   r'   s        r(   r}   *InstructBlipVideoProcessor.from_pretrained   sQ    G+,ITVT	 i''!!I)99:Wcvw&7#r*   )r   r   rd   )NNTFNNr   NNFFFFFTN)"__name__
__module____qualname____firstlineno____doc__rt   valid_kwargsvideo_processor_classtokenizer_classqformer_tokenizer_classr$   r   r   r   r   r   boolrF   r   r   r   intr   r   rY   r\   ra   propertyre   rs   classmethodr}   __static_attributes____classcell__)r'   s   @r(   r   r   (   s   $ GJ&'L0%O-H "^b#'5:;?$(,004*/+0',&+#;?#ll I0$y/4HYCZZ[l !	l
 tS/12l $%778l SMl l %SMl  (~l $(l %)l !%l  $l l  !l" !sJ!78#l& 
'l^<6 X X&  r*   r   )r   rn   typingr   r   r   image_processing_utilsr   processing_utilsr   tokenization_utils_baser	   r
   r   r   r   r   utilsr   r   video_utilsr   autor   
get_loggerr   rK   r   __all__rC   r*   r(   <module>r      s[    
 ( ( 2 .  ) %   
		H	%D DN (
(r*   