
    fThe                        S r SSKrSSKJr  SSKJrJrJr  SSKr	SSK
Jr  SSKJrJr  SSKJrJr  SS	KJr  SS
KJrJr  SSKJr  \R2                  " \5      r\ " S S\5      5       r " S S\	R:                  R<                  R>                  5      r  " S S\	R:                  R<                  R>                  5      r! " S S\	R:                  R<                  R>                  5      r" " S S\	R:                  R<                  R>                  5      r# " S S\	R:                  R<                  R>                  5      r$ " S S\5      r%g)zOTF IdeficsVision model: a copy of CLIPVisionModel using a simpler config object    N)	dataclass)OptionalTupleUnion   )get_tf_activation)TFBaseModelOutputTFBaseModelOutputWithPooling)TFPreTrainedModel
shape_list)flatten)ModelOutputlogging   )IdeficsVisionConfigc                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                        \	S'   Sr\\\R                        \	S'   Srg)	TFIdeficsVisionModelOutput"   am  
Base class for vision model's outputs that also contains image embeddings of the pooling of the last hidden states.

Args:
    image_embeds (`tf.Tensor` of shape `(batch_size, output_dim)` *optional* returned when model is initialized with `with_projection=True`):
        The image embeddings obtained by applying the projection layer to the pooler_output.
    last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `tf.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
        one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

        Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
    attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
        Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
        sequence_length)`.

        Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
        heads.
Nimage_embedslast_hidden_statehidden_states
attentions )__name__
__module____qualname____firstlineno____doc__r   r   tfTensor__annotations__r   r   r   r   __static_attributes__r       ]/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/idefics/vision_tf.pyr   r   "   s`    * )-L(299%,-1x		*104M8E")),-4-1Jryy)*1r#   r   c                      ^  \ rS rSrS\4U 4S jjrS\R                  S\S\S\R                  4S jr	SS	\R                  S
\
S\R                  4S jjrSS jrSrU =r$ )TFIdeficsVisionEmbeddings?   configc           
      &  > [         TU ]  " S
0 UD6  Xl        UR                  U l        UR
                  U l        UR                  U l        [        R                  R                  R                  U R                  U R                  U R                  SSSSS9U l        U R
                  U R                  -  S-  U l        U R                  S-   U l        [        R                  R                  R                  U R                  U R                  SS	9U l        g )NFvalidchannels_lastpatch_embedding)filterskernel_sizestridesuse_biaspaddingdata_formatname   r   position_embeddingr3   r   )super__init__r(   hidden_size	embed_dim
image_size
patch_sizer   keraslayersConv2Dr,   num_patchesnum_positions	Embeddingr5   selfr(   kwargs	__class__s      r$   r8   "TFIdeficsVisionEmbeddings.__init__@   s    "6"++ ++ ++!xx55NNOO'"  6  
 !OOt>1D!--1"$((//";";5I #< #
r#   
embeddingsheightwidthreturnc           	      ~   [        U5      S   S-
  nU R                  U R                  5      n[        U5      S   S-
  nXF:X  a  X#:X  a  U$ US S 2S4   nUS S 2SS 24   n[        U5      S   n	X R                  R                  -  n
X0R                  R                  -  nU
S-   US-   p[
        R                  " [        U5      5      n[        R                  " US[        U5      [        U5      U	45      nX-  nX-  n[        R                  " [        R                  " U5      S   [        R                  5      n[        R                  " [        R                  " U5      S   [        R                  5      n[        R                  " X-  [        R                  5      n[        R                  " UU-  [        R                  5      n[        R                  R!                  UUU/[        R                  R"                  R$                  S9n[        U
5      [        U5      S   :w  d  [        U5      [        U5      S   :w  a@  ['        S	[        U
5      [        U5      4 S
[        U5      S   [        U5      S   4 S35      e[        R                  " USSU	45      n[        R(                  " U[        R*                  S S 24   U4SS9$ )Nr   r   g?r4   )sizemethodzNumber of patches for images (z/) don't match the shape of position embedding ()axis)r   r5   position_idsr(   r<   mathsqrtfloatr   reshapeintcastshapefloat32int32imageresizeResizeMethodBICUBIC
ValueErrorconcatnewaxis)rD   rH   rI   rJ   r@   	pos_embedrA   class_pos_embedpatch_pos_embedr:   num_h_patchesnum_w_patchessqrt_num_positionsscale_heightscale_widthoriginal_heightoriginal_width
new_height	new_widths                      r$   interpolate_pos_encoding2TFIdeficsVisionEmbeddings.interpolate_pos_encodingX   se    ,Q/!3++D,=,=>	"9-a014'FO#AqD/#AqrE*z*2.	++"8"88!7!77'4s':MC<O}!YYu]';<**_q#>P:QSVWiSjlu6vw$9#8''"((?";A">

K/!:1!=rzzJWW_;RXXF
GGN[8"((C	((//:y"9"((BWBWB_B_ * 

 *_"=b"AA=!Z%@%DD0]1CSEW1W0X Y00:?0KB0OQ[\kQlmoQp0p/qqrt  **_q"i6HIyy/"**a-8/JQRSSr#   pixel_valuesrr   c                    [        U[        5      (       a  US   n[        R                  " USS9n[	        U5      u  p4pVU(       dJ  X@R
                  :w  d  XPR
                  :w  a,  [        SU SU SU R
                   SU R
                   S3	5      eU R                  U5      n[        USS	5      n[        R                  " U R                  [        R                  [        R                  S S 24   USU R                  /5      n[        R                  " X/SS
9n	U(       a  XR                  XU5      -   n	U	$ XR                  U R                   5      -   n	U	$ )Nrt   )r   r4   r   r   permzInput image size (*z) doesn't match model (z8). You should try to set `interpolate_pos_encoding=True`r   r4   rS   )
isinstancedictr   	transposer   r;   rc   r,   r   broadcast_toclass_embeddingre   r:   rd   rr   r5   rU   )
rD   rt   rr   
batch_sizerI   rJ   num_channelspatch_embedsclass_embedsrH   s
             r$   callTFIdeficsVisionEmbeddings.call   s@   
 lD))'7L||L|D2<\2J/
E'(E__,D (% 9)4??*;;su 
 ++L9 |Q2  RZZ!:;j!T^^=\
 YY;!D
 $#&C&CJX]&^^J  $&=&=d>O>O&PPJr#   c                    U R                   (       a  g SU l         [        R                  " U R                  SS9[        R                  S S 24   U l        U R                  U R                  4SS9U l        [        U SS 5      bf  [        R                  " U R                  R                  5         U R                  R                  S S S U R                  R                  /5        S S S 5        [        U SS 5      bO  [        R                  " U R                   R                  5         U R                   R                  S 5        S S S 5        g g ! , (       d  f       Nl= f! , (       d  f       g = f)NTzself.position_idsr6   r}   )r\   r3   r,   r5   )builtr   rangerA   re   rU   
add_weightr:   r}   getattr
name_scoper,   r3   buildr(   r   r5   rD   input_shapes     r$   r   TFIdeficsVisionEmbeddings.build   s   ::
HHT%7%7>QRSUS]S]_`S`a#dnn5FM^_4*D1=t33889$$**D$dkk>V>V+WX :4-t4@t66;;<''--d3 =< A :9 =<s   +4EE
E
E$)r   r}   r(   r:   r;   r@   rA   r,   r<   r5   rU   FN)r   r   r   r   r   r8   r   r    rZ   rr   boolr   r   r"   __classcell__rF   s   @r$   r&   r&   ?   ss    
2 
0%T299 %Tc %TRU %TZ\ZcZc %TN! !d !WYW`W` !F4 4r#   r&   c                   0  ^  \ rS rSrSrU 4S jrS\R                  S\S\4S jr	   SS\R                  S	\
\R                     S
\
\R                     S\
\   S\\R                  \
\R                     \
\\R                        4   4
S jjrSS jrSrU =r$ )TFIdeficsVisionAttention   z=Multi-headed attention from 'Attention Is All You Need' paperc                 N  > [         TU ]  " S
0 UD6  Xl        UR                  U l        UR
                  U l        U R                  U R                  -  U l        U R                  U R                  -  U R                  :w  a&  [        SU R                   SU R                   S35      eU R                  S-  U l	        UR                  U l        [        R                  R                  R                  U R                  SS9U l        [        R                  R                  R                  U R                  SS9U l        [        R                  R                  R                  U R                  SS9U l        [        R                  R                  R                  U R                  S	S9U l        g )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      k_projr6   v_projq_projout_projr   )r7   r8   r(   r9   r:   num_attention_heads	num_headshead_dimrc   scaleattention_dropoutdropoutr   r=   r>   Denser   r   r   r   rC   s      r$   r8   !TFIdeficsVisionAttention.__init__   s:   "6"++33$..8==4>>)T^^;MdnnM] ^NN#2'  ]]D(
//hhoo++DNN+Jhhoo++DNN+Jhhoo++DNN+J--dnn:-Nr#   tensorseq_lenbszc           	          [         R                  " [         R                  " XX R                  U R                  45      / SQS9$ )Nr   r4   r   r   rv   )r   r{   rY   r   r   )rD   r   r   r   s       r$   _shapeTFIdeficsVisionAttention._shape   s,    ||BJJvWnndmm/\]dpqqr#   r   attention_maskcausal_attention_maskoutput_attentionsrK   c           
      &   [        U5      u  pVnU R                  U5      U R                  -  nU R                  U R	                  U5      SU5      n	U R                  U R                  U5      SU5      n
XPR                  -  SU R                  4n[        R                  " U R                  XU5      U5      n[        R                  " X5      n	[        R                  " X5      n
[        U	5      S   n[        R                  R                  XSS9n[        R                  R                  [        R                  " U5      XPR                  -  Xl/SXPR                  -  Xl/ S[        R                  " U5       3S9  Ubz  [        U5      USXl/:w  a  [        S	USXl4 S[        U5       35      e[        R                  " XU R                  Xl45      U-   n[        R                  " XU R                  -  Xl45      nUbz  [        U5      USXl/:w  a  [        S	USXl4 S[        U5       35      e[        R                  " XU R                  Xl45      U-   n[        R                  " XU R                  -  Xl45      n[        R                   R#                  USS
9nU(       aI  [        R                  " XU R                  Xl45      n[        R                  " XU R                  -  Xl45      nOSn[        R                   R%                  XR$                  S9n[        R                  R                  X5      n[        R                  R                  [        R                  " U5      XPR                  -  X`R                  /SXPR                  -  X`R                  / S[        R                  " U5       3S9  [        R                  " UXPR                  X`R                  45      n[        R&                  " U/ SQS9n[        R                  " UXVU45      nU R)                  U5      nUU4$ )z#Input shape: Batch x Time x ChannelrM   r   T)transpose_bz$Attention weights should be of size z	, but is )messageNz!Attention mask should be of size rS   )rater   rv   )r   r   r   r   r   r   r   r   r   rY   linalgmatmul	debuggingassert_equalr\   rc   nnsoftmaxr   r{   r   )rD   r   r   r   r   r   tgt_lenr:   query_states
key_statesvalue_states
proj_shapesrc_lenattn_weightsattn_weights_reshaped
attn_probsattn_outputs                    r$   r   TFIdeficsVisionAttention.call   s    #-]";i {{=1DJJ>[[]!;RE
{{4;;}#=r3GNN*B>
zz$++lS"I:VZZ
7
zz,;Z(+yy''d'S
!!HH\">>!74:C..<PRY;c:ddmnpnvnv  xD  oE  nF  G 	" 	
 !,/0S!W4NN 7a8R7S T"#89:<  ::l$..'4[\_ttL::l4>>5I74\]L%.)c1g-GG 7a8R7SS\]ghv]w\xy  ::l$..'4[\_mmL::l4>>5I74\]Luu}}\};
 %'JJ|4>>SZ=d$e!::&;DNN>RT[=efL$(!UU]]<ll]C
ii&&z@
!!HH[!>>!7MM::C..<PRY[h[h;i:jjstvt|t|  ~I  uJ  tK  L 	" 	
 jjsNNG]].[\ll;\BjjsY.GHmmK0111r#   c                 T   U R                   (       a  g SU l         [        U SS 5      bd  [        R                  " U R                  R
                  5         U R                  R                  U R                  U R                  45        S S S 5        [        U SS 5      bd  [        R                  " U R                  R
                  5         U R                  R                  U R                  U R                  45        S S S 5        [        U SS 5      bd  [        R                  " U R                  R
                  5         U R                  R                  U R                  U R                  45        S S S 5        [        U SS 5      be  [        R                  " U R                  R
                  5         U R                  R                  U R                  U R                  45        S S S 5        g g ! , (       d  f       GNg= f! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       g = f)NTr   r   r   r   )r   r   r   r   r   r3   r   r:   r   r   r   r   s     r$   r   TFIdeficsVisionAttention.build  sc   ::
44(4t{{//0!!4>>4>>"BC 144(4t{{//0!!4>>4>>"BC 144(4t{{//0!!4>>4>>"BC 14T*6t}}112##T^^T^^$DE 32 7 10 10 10 32s0   2G$2G662H(2H$
G36
H
H
H')r   r(   r   r:   r   r   r   r   r   r   r   )NNFr   )r   r   r   r   r   r8   r   r    rZ   r   r   r   r   r   r   r"   r   r   s   @r$   r   r      s    GO&rRYY r r3 r /359,1L2yyL2 !+L2  (		2	L2
 $D>L2 
ryy(299-xbii8H/II	JL2\F Fr#   r   c                   l   ^  \ rS rSrU 4S jrS\R                  S\R                  4S jrSS jrSr	U =r
$ )	TFIdeficsVisionMLPi)  c                 B  > [         TU ]  " S0 UD6  Xl        [        UR                  5      U l        [        R                  R                  R                  UR                  SS9U l        [        R                  R                  R                  UR                  SS9U l        g )Nfc1r6   fc2r   )r7   r8   r(   r   
hidden_actactivation_fnr   r=   r>   r   intermediate_sizer   r9   r   rC   s      r$   r8   TFIdeficsVisionMLP.__init__*  su    "6".v/@/@A88??(()A)A(N88??((););%(Hr#   r   rK   c                 l    U R                  U5      nU R                  U5      nU R                  U5      nU$ r   )r   r   r   )rD   r   s     r$   r   TFIdeficsVisionMLP.call1  s4    /**=9/r#   c                 <   U R                   (       a  g SU l         [        U SS 5      bb  [        R                  " U R                  R
                  5         U R                  R                  U R                  R                  5        S S S 5        [        U SS 5      bc  [        R                  " U R                  R
                  5         U R                  R                  U R                  R                  5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       g = f)NTr   r   )r   r   r   r   r   r3   r   r(   r9   r   r   r   s     r$   r   TFIdeficsVisionMLP.build7  s    ::
4%1txx}}-t{{667 .4%1txx}}-t{{<<= .- 2 .- .-s   0C<0D<
D

D)r   r   r(   r   r   r   )r   r   r   r   r8   r   r    r   r   r"   r   r   s   @r$   r   r   )  s.    I")) 		 	> 	>r#   r   c                      ^  \ rS rSrS\4U 4S jjr SS\R                  S\R                  S\R                  S\\	   S\
\R                     4
S	 jjrSS
 jrSrU =r$ )TFIdeficsVisionEncoderLayeriC  r(   c                 `  > [         TU ]  " S0 UD6  UR                  U l        [	        USS9U l        [        R                  R                  R                  UR                  SS9U l        [        USS9U l        [        R                  R                  R                  UR                  SS9U l        g )N	self_attnr6   layer_norm1epsilonr3   mlplayer_norm2r   )r7   r8   r9   r:   r   r   r   r=   r>   LayerNormalizationlayer_norm_epsr   r   r   r   rC   s      r$   r8   $TFIdeficsVisionEncoderLayer.__init__D  s    "6"++1&{K88??==fF[F[bo=p%f5988??==fF[F[bo=pr#   r   r   r   r   rK   c                     UnU R                  U5      nU R                  UUUUS9u  pXQ-   nUnU R                  U5      nU R                  U5      nXQ-   nU4nU(       a  Xv4-  nU$ )a  
Args:
    hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
    attention_mask (`tf.Tensor`): attention mask of size
        `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
        `(config.encoder_attention_heads,)`.
    output_attentions (`bool`, *optional*):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more detail.
)r   r   r   r   )r   r   r   r   )rD   r   r   r   r   residualr   outputss           r$   r    TFIdeficsVisionEncoderLayer.callL  s    " !((7&*nn')"7/	 '5 '
# !0 ((7/ 0 "&Gr#   c                     U R                   (       a  g SU l         [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      b\  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        g g ! , (       d  f       Ny= f! , (       d  f       g = f)NTr   r   )	r   r   r   r   r   r3   r   r:   r   r   s     r$   r   !TFIdeficsVisionEncoderLayer.buildt  s    ::
4-9t//445  &&dDNN'CD 64-9t//445  &&dDNN'CD 65 : 65 65s   )C.;)C?.
C<?
D)r   r:   r   r   r   r   r   r   )r   r   r   r   r   r8   r   r    r   r   r   r   r   r"   r   r   s   @r$   r   r   C  sr    q2 q -2&yy& 		&  "yy	&
 $D>& 
ryy	&P	E 	Er#   r   c                      ^  \ rS rSrSrS\4U 4S jjr      SS\\R                     S\\R                     S\\
   S\\
   S	\\
   S
\\
   S\\\4   4S jjrSS jrSrU =r$ )TFIdeficsVisionEncoderi  z
Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a
[`TFIdeficsVisionEncoderLayer`].

Args:
    config: IdeficsVisionConfig
r(   c                    > [         TU ]  " S0 UD6  Xl        [        UR                  5       Vs/ s H  n[        USU 3S9PM     snU l        SU l        g s  snf )Nzlayers.r6   Fr   )r7   r8   r(   r   num_hidden_layersr   r>   gradient_checkpointing)rD   r(   rE   irF   s       r$   r8   TFIdeficsVisionEncoder.__init__  s]    "6"MRSYSkSkMl
Ml'wqc]CMl
 ',#
s   Ar   r   r   output_hidden_statesreturn_dicttrainingrK   c                 F  ^ Tb  TOU R                   R                  mUb  UOU R                   R                  nUb  UOU R                   R                  nU(       a  SOSnT(       a  SOSn	Un
[	        U R
                  5       Hn  u  pU(       a  X4-   nU R                  (       a-  U(       a&  U4S jn[        R                  " U" U5      U
UU5      nO	U" U
UUTS9nUS   n
T(       d  Mf  XS   4-   n	Mp     U(       a  X4-   nU(       d  [        S XU	4 5       5      $ [        XU	S9$ )	a  
Args:
    inputs_embeds (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
        This is useful if you want more control over how to convert `input_ids` indices into associated vectors
        than the model's internal embedding lookup matrix.
    attention_mask (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
        Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

        - 1 for tokens that are **not masked**,
        - 0 for tokens that are **masked**.

        [What are attention masks?](../glossary#attention-mask)
    causal_attention_mask (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
        Causal mask for the text model. Mask values selected in `[0, 1]`:

        - 1 for tokens that are **not masked**,
        - 0 for tokens that are **masked**.

        [What are attention masks?](../glossary#attention-mask)
    output_attentions (`bool`, *optional*):
        Whether or not to return the attentions tensors of all attention layers. See `attentions` under
        returned tensors for more detail.
    output_hidden_states (`bool`, *optional*):
        Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
        for more detail.
    return_dict (`bool`, *optional*):
        Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
Nr   c                    >^  U U4S jnU$ )Nc                     > T" / U QTP76 $ r   r   )inputsmoduler   s    r$   custom_forwardRTFIdeficsVisionEncoder.call.<locals>.create_custom_forward.<locals>.custom_forward  s    %AvA/@AAr#   r   )r   r   r   s   ` r$   create_custom_forward:TFIdeficsVisionEncoder.call.<locals>.create_custom_forward  s    B *)r#   )r   r   r   c              3   .   #    U  H  oc  M  Uv   M     g 7fr   r   ).0vs     r$   	<genexpr>.TFIdeficsVisionEncoder.call.<locals>.<genexpr>  s     e$Sq$Ss   	)r   r   r   )r(   r   r   use_return_dict	enumerater>   r   r   recompute_gradtupler	   )rD   inputs_embedsr   r   r   r   r   r   encoder_statesall_attentionsr   idxencoder_layerr   layer_outputss       `          r$   r   TFIdeficsVisionEncoder.call  s8   N 2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]30d%"+DKK"8C#!/2B!B**x* !# 1 1)-8!")	! !.!")&7	! *!,M  !/3C2E!E9 #9<  +.>>Ne]N$Seee +Vd
 	
r#   c                    U R                   (       a  g SU l         [        U SS 5      bN  U R                   H=  n[        R                  " UR
                  5         UR                  S 5        S S S 5        M?     g g ! , (       d  f       MR  = f)NTr>   )r   r   r>   r   r   r3   r   )rD   r   layers      r$   r   TFIdeficsVisionEncoder.build  s`    ::
44(4]]5::.KK% /. % 5..s   A77
B	)r   r(   r   r>   )NNNNNNr   )r   r   r   r   r   r   r8   r   r   r    r   r   r   r	   r   r   r"   r   r   s   @r$   r   r     s    ,2 , /359,0/3&*#'V
 !+V
  (		2	V

 $D>V
 'tnV
 d^V
 4.V
 
u''	(V
p& &r#   r   c                      ^  \ rS rSrS\4U 4S jjr      SS\\R                     S\\	   S\\	   S\\	   S\\	   S	\\	   S
\
\\4   4S jjrSS jrSrU =r$ )TFIdeficsVisionTransformeri  r(   c                 n  > [         TU ]  " U40 UD6  Xl        UR                  U l        [        USS9U l        [        R                  R                  R                  UR                  SS9U l        [        USS9U l        [        R                  R                  R                  UR                  SS9U l        g )NrH   r6   pre_layrnormr   encoderpost_layernorm)r7   r8   r(   r9   r:   r&   rH   r   r=   r>   r   r   r  r   r  r  rC   s      r$   r8   #TFIdeficsVisionTransformer.__init__  s    *6*++3FNHHOO>>vG\G\cq>r-f9E hhoo@@I^I^eu@vr#   rt   r   r   rr   r   r   rK   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nUc  [	        S5      eU R                  XS9nU R                  U5      nU R                  UUUUUS9nUS   n	U	SS2SSS24   n
U R                  U
5      n
U(       d	  X4USS -   $ [        U	U
UR                  UR                  S9$ )z
Returns:

Nz You have to specify pixel_values)rr   )r   r   r   r   r   r   r   )r   pooler_outputr   r   )r(   r   r   r   rc   rH   r  r  r  r
   r   r   )rD   rt   r   r   rr   r   r   r   encoder_outputsr   pooled_outputs              r$   r   TFIdeficsVisionTransformer.call  s    2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B]?@@h))-8,,'/!5# ' 
 ,A.)!Q'2++M:%58KKK+/')77&11	
 	
r#   c                    U R                   (       a  g SU l         [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S S U R                  /5        S S S 5        [        U SS 5      bN  [        R                  " U R                  R
                  5         U R                  R                  S 5        S S S 5        [        U SS 5      b[  [        R                  " U R                  R
                  5         U R                  R                  S U R                  /5        S S S 5        g g ! , (       d  f       GN>= f! , (       d  f       N= f! , (       d  f       N= f! , (       d  f       g = f)NTrH   r  r  r  )r   r   r   r   rH   r3   r   r  r:   r  r  r   s     r$   r    TFIdeficsVisionTransformer.build-  sL   ::
4t,8t334%%d+ 54.:t00556!!''tT^^(DE 74D)5t||001""4( 24)40<t22778##))4*@A 98 = 54 76 21 98s0   F%.)F7G3(G%
F47
G
G
G')r   r(   r:   rH   r  r  r  )NNNFNFr   )r   r   r   r   r   r8   r   r   r    r   r   r   r
   r   r   r"   r   r   s   @r$   r
  r
    s    w2 w -1,0/338&*#(,
ryy),
 $D>,
 'tn	,

 #+4.,
 d^,
 4.,
 
u22	3,
\B Br#   r
  )&r   rV   dataclassesr   typingr   r   r   
tensorflowr   activations_tfr   modeling_tf_outputsr	   r
   modeling_tf_utilsr   r   tf_utilsr   utilsr   r   configuration_ideficsr   
get_loggerr   loggerr   r=   r>   Layerr&   r   r   r   r   r
  r   r#   r$   <module>r#     s    V  ! ) )  / R >  ) 6 
		H	% 2 2 28n4 5 5 n4bvFrxx44 vFr>.. >4:E"((//"7"7 :Ezp&RXX__22 p&fIB!2 IBr#   