
    fTh(                         S r SSKrSSKJrJr  SSKJr  SSKJr  SSK	J
r
JrJr  SSKJrJrJrJr  SS	KJr  S
SKJr  \R,                  " \5      r " S S\
SS9r " S S\5      rS/rg)zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
    N)ListUnion   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixinUnpack)
AddedTokenBatchEncodingPreTokenizedInput	TextInput)logging   )AutoTokenizerc            
       2    \ rS rSrSSSSSSSSSS.	0 S.rSrg)	InstructBlipProcessorKwargs&   TFr   )	add_special_tokenspaddingstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverbose)text_kwargsimages_kwargs N)__name__
__module____qualname____firstlineno__	_defaults__static_attributes__r        p/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/instructblip/processing_instructblip.pyr   r   &   s0     #').*/&+%*"

 Ir'   r   F)totalc            
          ^  \ rS rSrSr/ SQrS/rSrSrSr	SU 4S jjr
    SS\S	\\\\\   \\   4   S
\\   S\4S jjrS rS r\S 5       rU 4S jr\U 4S j5       rSrU =r$ )InstructBlipProcessor7   aX  
Constructs an InstructBLIP processor which wraps a BLIP image processor and a LLaMa/T5 tokenizer into a single
processor.

[`InstructBlipProcessor`] offers all the functionalities of [`BlipImageProcessor`] and [`AutoTokenizer`]. See the
docstring of [`~BlipProcessor.__call__`] and [`~BlipProcessor.decode`] for more information.

Args:
    image_processor (`BlipImageProcessor`):
        An instance of [`BlipImageProcessor`]. The image processor is a required input.
    tokenizer (`AutoTokenizer`):
        An instance of ['PreTrainedTokenizer`]. The tokenizer is a required input.
    qformer_tokenizer (`AutoTokenizer`):
        An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
    num_query_tokens (`int`, *optional*):"
        Number of tokens used by the Qformer as queries, should be same as in model's config.
)image_processor	tokenizerqformer_tokenizernum_query_tokens)BlipImageProcessorBlipImageProcessorFastr   c                    > [        US5      (       d,  [        SSSS9U l        UR                  U R                  /SS9  OUR                  U l        X@l        [
        TU ]  XU5        g )Nimage_tokenz<image>FT)
normalizedspecial)special_tokens)hasattrr   r4   
add_tokensr0   super__init__)selfr-   r.   r/   r0   kwargs	__class__s         r(   r;   InstructBlipProcessor.__init__P   sa    y-00))tTD  $"2"2!3D I(44D 05FGr'   imagestextr=   returnc                    Uc  Uc  [        S5      eU R                  " [        4SU R                  R                  0UD6n[        5       nUGb  [        U[        5      (       a  U/nO8[        U[        5      (       d#  [        US   [        5      (       d  [        S5      eUS   R                  SS5      nU R                  " U40 US   DSS0D6n	XS   S'   U R                  by  Ubv  0 n
U R                  R                  U R                  -  nU R                  U/[        U5      -  SSS	9nU	 H,  n[        X   X   5       VVs/ s H	  u  pX-   PM     snnX'   M.     OU	n
Ub  [        R!                  S
5        [#        XS9n
UR%                  U
5        U R&                  " U40 US   D6nUR                  S5      US'   UR                  S5      US'   Ub'  U R(                  " U40 US   D6nUR%                  U5        U$ s  snnf )aP  
This method uses [`BlipImageProcessor.__call__`] method to prepare image(s) for the model, and
[`BertTokenizerFast.__call__`] to prepare text for the model.

Please refer to the docstring of the above two methods for more information.
Args:
    images (`ImageInput`):
        The image or batch of images to be prepared. Each image can be a PIL image, NumPy array or PyTorch
        tensor. Both channels-first and channels-last formats are supported.
    text (`TextInput`, `PreTokenizedInput`, `List[TextInput]`, `List[PreTokenizedInput]`):
        The sequence or batch of sequences to be encoded. Each sequence can be a string or a list of strings
        (pretokenized string). If the sequences are provided as list of strings (pretokenized), you must set
        `is_split_into_words=True` (to lift the ambiguity with a batch of sequences).
Nz,You have to specify at least images or text.tokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsr   return_tensorsF)r   rE   aA  Expanding inputs for image tokens in InstructBLIP should be done in processing. Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your InstructBLIP model. Using processors without these attributes in the config is deprecated and will throw an error in v4.50.)tensor_type	input_idsqformer_input_idsattention_maskqformer_attention_maskr   )
ValueError_merge_kwargsr   r.   init_kwargsr   
isinstancestrlistpopr0   r4   contentlenziploggerwarning_oncer   updater/   r-   )r<   r@   rA   audiovideosr=   output_kwargsencodingrE   _text_encodingtext_encodingimage_tokensimage_token_encodingkimg_encodingtxt_encodingqformer_text_encodingimage_encodings                     r(   __call__InstructBlipProcessor.__call__Y   s1   , >dlKLL**'
"&.."<"<
 
  >$$$vd++JtAw4L4L !dee +=9==>NPTUN!^^DfM-4PfaefN=K-()9: $$0V5G "#//77$:O:OO'+~~!NSY.5Y] (6 ($ (A ;>>R>UWeWh:i(:i6L %3:i(M$ ( !/%''B *-TMOOM*$($:$:4$`=Q^C_$`!,A,E,Ek,RH()1F1J1JK[1\H-.!11&[M/<Z[NOON+3(s   ?G8c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.batch_decode`]. Please
refer to the docstring of this method for more information.
)r.   batch_decoder<   argsr=   s      r(   rh   "InstructBlipProcessor.batch_decode   s    
 ~~**D;F;;r'   c                 :    U R                   R                  " U0 UD6$ )z
This method forwards all its arguments to PreTrainedTokenizer's [`~PreTrainedTokenizer.decode`]. Please refer to
the docstring of this method for more information.
)r.   decoderi   s      r(   rm   InstructBlipProcessor.decode   s    
 ~~$$d5f55r'   c                     U R                   R                  nU R                  R                  n[        [        R                  X-   5      5      $ N)r.   model_input_namesr-   rP   dictfromkeys)r<   tokenizer_input_namesimage_processor_input_namess      r(   rq   'InstructBlipProcessor.model_input_names   s>     !% @ @&*&:&:&L&L#DMM"7"UVWWr'   c                   > [         R                  R                  U5      (       a  [        SU S35      e[         R                  " USS9  [         R                  R                  US5      nU R                  R                  U5        SU R                  ;   nU(       a  U R                  R                  S5        [        TU ]  " U40 UD6nU(       a  U =R                  S/-  sl        U$ )NzProvided path (z#) should be a directory, not a fileT)exist_okr/   )ospathisfilerK   makedirsjoinr/   save_pretrained
attributesremover:   )r<   save_directoryr=   qformer_tokenizer_pathqformer_presentoutputsr>   s         r(   r~   %InstructBlipProcessor.save_pretrained   s    77>>.))~.>>abcc
NT2!#n>Q!R../EF .@OO""#67').CFCOO 344Or'   c                    > [         TU ]  " U40 UD6n[        U[        5      (       a  US   n[        R                  " USS9nXCl        U$ )Nr   r/   )	subfolder)r:   from_pretrainedrN   tupler   r/   )clspretrained_model_name_or_pathr=   	processorr/   r>   s        r(   r   %InstructBlipProcessor.from_pretrained   sQ    G+,ITVT	 i''!!I)99:Wcvw&7#r'   )r4   r0   rp   )NNNN)r!   r"   r#   r$   __doc__r   valid_kwargsimage_processor_classtokenizer_classqformer_tokenizer_classr;   r   r   r   r   r   r
   r   r   re   rh   rm   propertyrq   r~   classmethodr   r&   __classcell__)r>   s   @r(   r+   r+   7   s    $ GJ&'LL%O-H "^bMM I0$y/4HYCZZ[M 45M 
M`<6 X X&  r'   r+   )r   ry   typingr   r   image_processing_utilsr   image_utilsr   processing_utilsr   r	   r
   tokenization_utils_baser   r   r   r   utilsr   autor   
get_loggerr!   rU   r   r+   __all__r    r'   r(   <module>r      sj    
  2 % H H     
		H	%"2% "eN eP #
#r'   