
    fTh_r                        S r SSKJr  SSKJrJrJrJr  SSKrSSK	rSSKJ
r
  SSKJrJrJr  SSKJr  SS	KJr  SS
KJr  SSKJr  SSKJr  SSKJrJrJrJrJrJr  SSK J!r!  SSK"J#r#  \RH                  " \%5      r&\ " S S\5      5       r'\ " S S\5      5       r( " S S\
RR                  5      r*\ " S S\5      5       r+\" SS9 " S S\+5      5       r, " S S\\5      r-\" S S9 " S! S"\+\5      5       r./ S#Qr/g)$zPyTorch PaliGemmamodel.    )	dataclass)ListOptionalTupleUnionN)nn   )CacheHybridCacheStaticCache)GenerationMixin)FlashAttentionKwargs)BaseModelOutputWithPast)PreTrainedModel)Unpack)
LossKwargsModelOutputauto_docstringcan_return_tupleis_torchdynamo_compilinglogging   )	AutoModel   )PaliGemmaConfigc                   B    \ rS rSr% SrSr\\R                     \	S'   Sr
g)PaligemmaModelOutputWithPast&   a  
Base class for Paligemma outputs, with hidden states and attentions.

Args:
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
        `(batch_size, num_heads, sequence_length, embed_size_per_head)`)

        Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
        one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
    attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
    image_hidden_states (`torch.FloatTensor`, *optional*):
        A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`.
        image_hidden_states of the model produced by the vision encoder and after projecting the last hidden state.
Nimage_hidden_states )__name__
__module____qualname____firstlineno____doc__r   r   torchFloatTensor__annotations____static_attributes__r        h/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/paligemma/modeling_paligemma.pyr   r   &   s    8 8<%"3"34;r*   r   c                   &   \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\\R                     \4      \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Sr\\R                     \	S	'   S
rg)PaliGemmaCausalLMOutputWithPastG   aE  
Base class for PaliGemma causal language model (or autoregressive) outputs.

Args:
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
        Language modeling loss (for next-token prediction).
    logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.text_config.vocab_size)`):
        Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
    past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
        Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
        `(batch_size, num_heads, sequence_length, embed_size_per_head)`)

        Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
        `past_key_values` input) to speed up sequential decoding.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
        one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
    attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
    image_hidden_states (`torch.FloatTensor`, *optional*):
        A `torch.FloatTensor` of size `(batch_size, num_images, sequence_length, hidden_size)`.
        image_hidden_states of the model produced by the vision encoder after projecting last hidden state.
Nlosslogitspast_key_valueshidden_states
attentionsr   r    )r!   r"   r#   r$   r%   r/   r   r&   r'   r(   r0   r1   r   r   r
   r2   r   r3   r   r)   r    r*   r+   r-   r-   G   s    < )-D(5$$
%,*.FHU&&'.GKOXeD):):$;U$BCDK8<M8E%"3"345<59Ju001297;%"3"34;r*   r-   c                   6   ^  \ rS rSrS\4U 4S jjrS rSrU =r$ )PaliGemmaMultiModalProjectoro   configc                    > [         TU ]  5         [        R                  " UR                  R
                  UR                  R                  SS9U l        g )NTbias)super__init__r   Linearvision_confighidden_sizeprojection_dimlinearselfr7   	__class__s     r+   r<   %PaliGemmaMultiModalProjector.__init__p   s;    ii 4 4 @ @&BVBVBeBelpqr*   c                 (    U R                  U5      nU$ NrA   )rC   image_featuresr2   s      r+   forward$PaliGemmaMultiModalProjector.forwardt   s    N3r*   rH   )	r!   r"   r#   r$   r   r<   rJ   r)   __classcell__rD   s   @r+   r5   r5   o   s    r r r*   r5   c                   H    \ rS rSr\rSrSrS/rSr	Sr
SrSrSrSrSrS rSrg)	PaliGemmaPreTrainedModelz    Tr5   r1   c                 b   [        U R                  SU R                  R                  5       R                  5      n[	        U[
        R                  5      (       aW  UR                  R                  R                  SUS9  UR                  b%  UR                  R                  R                  5         g g g )Ninitializer_range        )meanstd)getattrr7   get_text_configrS   
isinstancer   r=   weightdatanormal_r:   zero_)rC   modulerV   s      r+   _init_weights&PaliGemmaPreTrainedModel._init_weights   s     dkk#68S8S8U8g8ghfbii((MM&&CS&9{{&  &&( ' )r*   r    N)r!   r"   r#   r$   r   config_classbase_model_prefixsupports_gradient_checkpointing_no_split_modules_skip_keys_device_placement_supports_cache_class_supports_quantized_cache_supports_static_cache_supports_flash_attn_2_supports_sdpa_supports_attention_backendr_   r)   r    r*   r+   rO   rO   z   sJ    "L&*#78"3  $!!N"&)r*   rO   z{
    The Base Paligemma model which consists of a vision backbone and a language model withou language modeling head.,
    )custom_introc            #         ^  \ rS rSrSS0rS\4U 4S jjrS rS r     SS\	\
   4S	 jjrS
\R                  4S jr\\             SS\R"                  S
\R                  S\	\R$                     S\	\R"                     S\	\\\R                     \4      S\	\R"                     S\	\R"                     S\	\R                     S\	\R"                     S\	\
   S\	\
   S\	\
   S\	\
   S\\   S\\\4   4S jj5       5       rSrU =r$ )PaliGemmaModel   zlanguage_model.modellanguage_modelr7   c                   > [         TU ]  U5        [        R                  " UR                  S9U l        [        U5      U l        UR                  R                  U l	        [        R                  " UR                  S9nX l
        U R                  R                  b  U R                  R                  OSU l        U R                  5         g )N)r7   )r;   r<   r   from_configr>   vision_towerr5   multi_modal_projectortext_config
vocab_sizerp   r7   pad_token_id	post_init)rC   r7   rp   rD   s      r+   r<   PaliGemmaModel.__init__   s     %119M9MN%A&%I" ,,77"..f6H6HI,8<8P8P8\DKK44bdr*   c                 6    U R                   R                  5       $ rG   )rp   get_input_embeddingsrC   s    r+   r|   #PaliGemmaModel.get_input_embeddings   s    ""7799r*   c                 :    U R                   R                  U5        g rG   )rp   set_input_embeddingsrC   values     r+   r   #PaliGemmaModel.set_input_embeddings   s    007r*   is_trainingc                    U R                   R                  R                  S:X  a  Ub  SU;   a  U$ g Ub  UOU R                  n[	        U[
        5      n[        R                  " U R                  5      R                  nUc  UnUR                  S S u  pU(       a  UR                  5       nO_[	        U[        5      (       a  UR                  5       nO9[	        U[        R                  5      (       a  UR                  S   O
US   U
-   S-   nUb  UR                  5       S:X  a  U$ [        R                  " X4XR                  UR                   S9nU
S:w  a(  U(       a  [        R"                  " USS	9nOSUS S 2S U
24'   U[        R$                  " XR                   S
9UR'                  SS5      :  -  nUS S S S 2S S 24   R)                  U	SSS5      nUb  UR+                  5       nUR                  S   nU(       ae  Uc  [-        S5      eUS S 2S S 2S S 2S U24   R/                  US S 2S S S S 24   R1                  UR                   5      S:H  S5      US S 2S S 2S S 2S U24'   US S 2S S 2S S 2S U24   US S 2S S S S 24   R1                  UR                   5      -   nUS:H  nUS S 2S S 2S S 2S U24   R/                  X5      US S 2S S 2S S 2S U24'   U$ )Nflash_attention_2rT   r   rr   r   r      
fill_valuedtypedevicediagonalr   z/Token type ids must be provided during training)r7   rv   _attn_implementationtrainingrY   r   r&   finfor   minshapeget_max_cache_shaper   Tensordimfullr   triuarangereshapeexpandclone
ValueErrormasked_fillto)rC   attention_masktoken_type_idsr1   cache_positioninput_tensorr   using_static_cache	min_dtypeinputs_lead_dimsequence_lengthtarget_lengthcausal_maskmask_lengthpadding_masks                  r+   _update_causal_mask"PaliGemmaModel._update_causal_mask   s    ;;""77;NN)c^.C%%%0%<k$--'EKK

+//	)L+7+=+=bq+A(+??AM55+??AM nell;; $$R(#A&81<  %.*<*<*>!*C!!jj,**]k]r]r
 a#jjqA36A///0u||M:O:OPSaSiSijlnoSppp!$a"23::?ArSUV%%++-K(..r2K !)$%VWW5@Aq,;,AV5W5c5c"1dD!#34778J8JKqPRS6Aq!\k\12
 'q!Q'<=qRVX\^_O_@`@c@cdodvdv@wwL'1,L1<Q1l{l=R1S1_1_2K1a+-. r*   pixel_valuesc                     U R                  U5      nUR                  nU R                  U5      nX@R                  R                  R
                  S-  -  nU$ )ae  
Obtains image last hidden states from the vision tower and apply multimodal projection.

Args:
    pixel_values (`torch.FloatTensor]` of shape `(batch_size, channels, height, width)`)
       The tensors corresponding to the input images.
Returns:
    image_features (`torch.Tensor`): Image feature tensor of shape `(num_images, image_length, embed_dim)`).
g      ?)rt   last_hidden_stateru   r7   rv   r?   )rC   r   image_outputsselected_image_featurerI   s        r+   get_image_features!PaliGemmaModel.get_image_features   sU     )),7!.!@!@334JK';;+B+B+N+NPS+STr*   	input_idsr   position_idsr1   r   r   inputs_embedslabels	use_cacheoutput_attentionsoutput_hidden_statesreturn_dictkwargsreturnc                 `   USL USL-  (       a  [        S5      eUb  UOU R                  R                  nUb  UOU R                  R                  nUb  UOU R                  R                  nUSL=(       a    U	SLnUbR  U R                  R
                  U R                  :  a.  XR                  R
                  :H  nUR                  5       nSUU'   OUnUc  U R                  5       " U5      nUcE  Ub  UR                  5       OSn[        R                  " UUUR                  S   -   UR                  S9nUc  UR                  S5      S-   nUGbx  U R                  U5      nUcY  XR                  5       " [        R                   " U R                  R
                  [        R"                  UR                  S95      :H  nOQXR                  R
                  :H  R                  S5      nUR%                  U5      R'                  UR                  5      n[)        5       (       ds  UU   R+                  5       UR+                  5       :w  aN  UR-                  SS9R-                  SS9S   n[        S	U S
UR                  S   UR                  S   -   S35      eUR'                  UR                  UR.                  5      nUR1                  UU5      nU R3                  X6XWX5      nU R4                  " SUUUUU
UUSUS.	UD6n[7        UR8                  UR:                  UR<                  UR>                  Ub  WS9$ SS9$ )  
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
    Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
    config.text_config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
    (masked), the loss is only computed for the tokens with labels in `[0, ..., config.text_config.vocab_size]`.

Example:

```python
>>> from PIL import Image
>>> import requests
>>> from transformers import AutoProcessor, PaliGemmaForConditionalGeneration

>>> model = PaliGemmaForConditionalGeneration.from_pretrained("google/paligemma2-3b-mix-224")
>>> processor = AutoProcessor.from_pretrained("google/paligemma2-3b-mix-224")

>>> prompt = "Where is the cat standing?"
>>> url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> inputs = processor(images=image, text=prompt,  return_tensors="pt")

>>> # Generate
>>> generate_ids = model.generate(**inputs,)
>>> processor.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
"Where is the cat standing?\nsnow"
```Nz:You must specify exactly one of input_ids or inputs_embedsr   r   r   )r   r   rr   )r   zVNumber of images does not match number of special image tokens in the input text. Got z image tokens in the text but z tokens from image embeddings.T)	r   r   r1   r   r   r   r   r   r   )r   r1   r2   r3   r   r    ) r   r7   r   r   use_return_dictimage_token_idrw   r   r|   get_seq_lengthr&   r   r   r   	unsqueezer   tensorlong	expand_asr   r   numelsumr   masked_scatterr   rp   r   r   r1   r2   r3   )rC   r   r   r   r   r1   r   r   r   r   r   r   r   r   r   r   special_image_maskllm_input_idspast_seen_tokensrI   image_tokens_in_textr   outputss                          r+   rJ   PaliGemmaModel.forward  sh   ^ -t";<YZZ1B1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]$D0GV45G  T[[%?%?4??%R!*kk.H.H!H%OO-M01M,-%M  557FM!CRC^==?de"\\ "2]5H5H5K"KTaThThN )33A6:L #!44\BN %26O6O6QLL!;!;5::VcVjVjk7 &" '0;;3M3M&M%X%XY[%\"%7%A%A-%P%S%STaThTh%i"+---@R2S2Y2Y2[_m_s_s_u2u(:'?'?A'?'F'J'Jq'J'QRS'T$ /00N~OcOcdeOfiwi}i}~  jA  PA  OB B44 
 ,..}/C/C]EXEXYN)889K^\M..O]
 %% 
&%+'/!5)
 
 ,%77#33!//))2>2J
 	

 QU
 	
r*   )rp   ru   rx   rt   rw   )NNNNN)NNNNNNNNNNNNN)r!   r"   r#   r$   _checkpoint_conversion_mappingr   r<   r|   r   r   boolr   r&   r'   r   r   r   
LongTensorr   r   r   r
   r   r   r   r   rJ   r)   rL   rM   s   @r+   rn   rn      s    '=>N%O"
 
:8 &*B d^BHu/@/@    '+*.1537KO595959-1$(,0/3&*x
##x
 ''x
 !.	x

 u//0x
 "%U->->(?(F"GHx
 !!1!12x
 !!1!12x
   1 12x
 ))*x
 D>x
 $D>x
 'tnx
 d^x
 -.x
  
u22	3!x
  x
r*   rn   c                       \ rS rSrSrg)KwargsForCausalLMi  r    N)r!   r"   r#   r$   r)   r    r*   r+   r   r     s    3r*   r   z|
    The Base Paligemma model which consists of a vision backbone and a language model without language modeling head.,
    c            %         ^  \ rS rSrSSSSS.rS/rS\4U 4S	 jjrS
 rS r	S r
S r\S 5       r\S 5       r\S 5       r\\              S)S\R&                  S\R(                  S\\R,                     S\\R&                     S\\\\R(                     \4      S\\R&                     S\\R&                     S\\R(                     S\\R&                     S\\   S\\   S\\   S\\   S\\\R,                  4   S\\   S \\\4   4 S! jj5       5       r           S*U 4S" jjr!\"S\R,                  S#\S$\S%\RF                  S\R,                  S&\4S' j5       r$S(r%U =r&$ )+!PaliGemmaForConditionalGenerationi  zmodel.language_modelzmodel.vision_towerzmodel.multi_modal_projectorlm_head)z^language_model.modelz^vision_towerz^multi_modal_projectorz^language_model.lm_headzlm_head.weightr7   c                    > [         TU ]  U5        [        U5      U l        [        R
                  " UR                  R                  UR                  R                  SS9U l	        U R                  5         g )NFr9   )r;   r<   rn   modelr   r=   rv   r?   rw   r   ry   rB   s     r+   r<   *PaliGemmaForConditionalGeneration.__init__  sS     #F+
yy!3!3!?!?ASASA^A^ejkr*   c                 6    U R                   R                  5       $ rG   )r   r|   r}   s    r+   r|   6PaliGemmaForConditionalGeneration.get_input_embeddings  s    zz..00r*   c                 :    U R                   R                  U5        g rG   )r   r   r   s     r+   r   6PaliGemmaForConditionalGeneration.set_input_embeddings  s    

''.r*   c                     U R                   $ rG   r   r}   s    r+   get_output_embeddings7PaliGemmaForConditionalGeneration.get_output_embeddings  s    ||r*   c                     Xl         g rG   r   )rC   new_embeddingss     r+   set_output_embeddings7PaliGemmaForConditionalGeneration.set_output_embeddings  s    %r*   c                 .    U R                   R                  $ rG   )r   rp   r}   s    r+   rp   0PaliGemmaForConditionalGeneration.language_model  s    zz(((r*   c                 .    U R                   R                  $ rG   )r   rt   r}   s    r+   rt   .PaliGemmaForConditionalGeneration.vision_tower  s    zz&&&r*   c                 .    U R                   R                  $ rG   )r   ru   r}   s    r+   ru   7PaliGemmaForConditionalGeneration.multi_modal_projector  s    zz///r*   r   r   r   r   r1   r   r   r   r   r   r   r   r   logits_to_keepr   r   c                 H   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R                  " SUUUUUUUU
U	UUSUS.UD6nUS   n[        U[        5      (       a  [        U* S5      OUnU R                  USS2USS24   5      nSnU	b3  U R                  " SUXR                   R                  R                  S.UD6n[        UUUR                  UR                  UR                  UR                   S9$ )r   NT)r   r   r   r   r   r1   r   r   r   r   r   r   r   r   )r0   r   rw   )r/   r0   r1   r2   r3   r   r    )r7   r   r   r   r   rY   intslicer   loss_functionrv   rw   r-   r1   r2   r3   r   )rC   r   r   r   r   r1   r   r   r   r   r   r   r   r   r   r   r   r2   slice_indicesr0   r/   s                        r+   rJ   )PaliGemmaForConditionalGeneration.forward  sP   ^ 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]** 
%))%+'/!5)
 
"  
8B>SV8W8W~ot4]kmA}a,?@A%% f9P9P9[9[_eD /#33!//)) ' ; ;
 	
r*   c                 @  > [         TU ]  " U4UUUUUU	U
US.UD6nUR                  S5      b  US==   S-  ss'   US   S:X  a  XmS'   US L=(       a    US LnUS   S:X  a>  [        U[        5      (       a)  Ub  UOUnU R
                  R                  XxX$X5      nUUS'   U$ )N)r1   r   r   r   r   r   r   r   r   r   r   r   r   )r;   prepare_inputs_for_generationgetrY   r   r   r   )rC   r   r1   r   r   r   r   r   r   r   r   r   r   model_inputsr   r   r   rD   s                    r+   r   ?PaliGemmaForConditionalGeneration.prepare_inputs_for_generation
  s      w<
+')%)))
 
 N+7(A-( !!+7($D0GV45G!!j+&N&N,9,E=9L**88Q]K .9L)*r*   r   r   r   
batch_sizec                    U b  U R                  5       S:X  a  U nU$ [        R                  " U5      R                  n[        R                  " X4XUR
                  S9nUS:w  a  [        R                  " USS9nU[        R                  " X$R
                  S9UR                  SS5      :  -  nUSSSS2SS24   R                  USSS5      nU b  UR                  5       nU R                  S   n	USS2SS2SS2SU	24   U SS2SSSS24   R                  UR
                  5      -   n
U
S:H  n
USS2SS2SS2SU	24   R                  X5      USS2SS2SS2SU	24'   U$ )	a  
Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape
`(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing.

Args:
    attention_mask (`torch.Tensor`):
        A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape
        `(batch_size, 1, query_length, key_value_length)`.
    sequence_length (`int`):
        The sequence length being processed.
    target_length (`int`):
        The target length: when generating with static cache, the mask should be as long as the static cache,
        to account for the 0 padding, the part of the cache that is not filled yet.
    dtype (`torch.dtype`):
        The dtype to use for the 4D attention mask.
    cache_position (`torch.Tensor`):
        Indices depicting the position of the input sequence tokens in the sequence.
    batch_size (`torch.Tensor`):
        Batch size.
Nr   r   r   r   r   rr   r   )r   r&   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   s              r+   5_prepare_4d_causal_attention_mask_with_cache_positionWPaliGemmaForConditionalGeneration._prepare_4d_causal_attention_mask_with_cache_position8  s}   > %.*<*<*>!*C(K* ' E*..I** 0Y\j\q\qK !##jjqA5<<>S>STWeWmWmnprsWtttK%dD!Q&67>>z1bRTUK))//1,2226*1aL[L+@ANSTVZ\`bcScDdDgDg&&E    ,q05@Aq,;,AV5W5c5c 6Aq!\k\12 r*   )r   r   )NNNNNNNNNNNNNr   )
NNNNNNNTNN)'r!   r"   r#   r$   r   _tied_weights_keysr   r<   r|   r   r   r   propertyrp   rt   ru   r   r   r&   r   r'   r   r   r   r   r
   r   r   r   r   r   r-   rJ   r   staticmethodr   r   r)   rL   rM   s   @r+   r   r     s    "8-"?#,	&" ++ 1/& ) ) ' ' 0 0  '+*.1537KO595959-1$(,0/3&*34V
##V
 ''V
 !.	V

 u//0V
 "%U->->(?(F"GHV
 !!1!12V
 !!1!12V
   1 12V
 ))*V
 D>V
 $D>V
 'tnV
 d^V
 c5<</0V
  *+!V
" 
u55	6#V
  V
v ,\ 444 4 {{	4
 4 4 4r*   r   )r   rO   rn   )0r%   dataclassesr   typingr   r   r   r   r&   torch.utils.checkpointr   cache_utilsr
   r   r   
generationr   modeling_flash_attention_utilsr   modeling_outputsr   modeling_utilsr   processing_utilsr   utilsr   r   r   r   r   r   autor   configuration_paligemmar   
get_loggerr!   loggerr   r-   Moduler5   rO   rn   r   r   __all__r    r*   r+   <module>r     s    ! / /    : : ) B 7 - & q q  4 
		H	% <#: < <@ $<k $< $<N299  ) ) )0 
e
- e

e
P ?,j > 
f(@/ f
fR ^r*   