
    fTh~g              	          S r SSKrSSKJr  SSKJrJrJr  SSK	r	SSK
r	SSK	Jr  SSKJrJrJr  SSKJrJr  SS	KJrJrJr  SS
KJrJr  SSKJr  \R8                  " \5      r\ " S S\5      5       rS9S\	R@                  S\!S\"S\	R@                  4S jjr# " S S\RH                  5      r% " S S\RH                  5      r& " S S\RH                  5      r' " S S\RH                  5      r( " S S\RH                  5      r) " S S\RH                  5      r* " S  S!\RH                  5      r+ " S" S#\RH                  5      r, " S$ S%\RH                  5      r- " S& S'\RH                  5      r. " S( S)\RH                  5      r/ " S* S+\RH                  5      r0 " S, S-\RH                  5      r1 " S. S/\RH                  5      r2\ " S0 S1\5      5       r3\ " S2 S3\35      5       r4\" S4S59 " S6 S7\35      5       r5/ S8Qr6g):zPyTorch CvT model.    N)	dataclass)OptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )$ImageClassifierOutputWithNoAttentionModelOutput)PreTrainedModel find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )	CvtConfigc                       \ rS rSr% SrSr\\R                     \	S'   Sr
\\R                     \	S'   Sr\\\R                  S4      \	S'   Srg)	BaseModelOutputWithCLSToken#   aS  
Base class for model's outputs, with potential hidden states and attentions.

Args:
    last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
        Sequence of hidden-states at the output of the last layer of the model.
    cls_token_value (`torch.FloatTensor` of shape `(batch_size, 1, hidden_size)`):
        Classification token at the output of the last layer of the model.
    hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
        Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of
        shape `(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer
        plus the initial embedding outputs.
Nlast_hidden_statecls_token_value.hidden_states )__name__
__module____qualname____firstlineno____doc__r   r   torchFloatTensor__annotations__r   r   r   __static_attributes__r       \/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/cvt/modeling_cvt.pyr   r   #   sS     6:x 1 12937OXe//07=AM8E%"3"3S"89:Ar%   r   input	drop_probtrainingreturnc                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )a*  
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
argument.
        r   r   )r   )dtypedevice)shapendimr!   randr-   r.   floor_div)r'   r(   r)   	keep_probr/   random_tensoroutputs          r&   	drop_pathr7   9   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FMr%   c                      ^  \ rS rSrSrSS\\   SS4U 4S jjjrS\R                  S\R                  4S jr
S\4S	 jrS
rU =r$ )CvtDropPathN   zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr(   r*   c                 .   > [         TU ]  5         Xl        g N)super__init__r(   )selfr(   	__class__s     r&   r>   CvtDropPath.__init__Q   s    "r%   r   c                 B    [        XR                  U R                  5      $ r<   )r7   r(   r)   )r?   r   s     r&   forwardCvtDropPath.forwardU   s    FFr%   c                 8    SR                  U R                  5      $ )Nzp={})formatr(   )r?   s    r&   
extra_reprCvtDropPath.extra_reprX   s    }}T^^,,r%   r(   r<   )r   r   r   r   r    r   floatr>   r!   TensorrC   strrG   r$   __classcell__r@   s   @r&   r9   r9   N   sQ    b#(5/ #T # #GU\\ Gell G-C - -r%   r9   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )CvtEmbeddings\   z
Construct the CvT embeddings.
c                 x   > [         TU ]  5         [        XX4US9U l        [        R
                  " U5      U l        g )N)
patch_sizenum_channels	embed_dimstridepadding)r=   r>   CvtConvEmbeddingsconvolution_embeddingsr   Dropoutdropout)r?   rS   rT   rU   rV   rW   dropout_rater@   s          r&   r>   CvtEmbeddings.__init__a   s5    &7!	jq'
# zz,/r%   c                 J    U R                  U5      nU R                  U5      nU$ r<   rY   r[   )r?   pixel_valueshidden_states      r&   rC   CvtEmbeddings.forwardh   s&    22<@||L1r%   r_   	r   r   r   r   r    r>   rC   r$   rM   rN   s   @r&   rP   rP   \   s    0 r%   rP   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )rX   n   z
Image to Conv Embedding.
c                    > [         TU ]  5         [        U[        R                  R
                  5      (       a  UOX4nXl        [        R                  " X#XUS9U l	        [        R                  " U5      U l        g )N)kernel_sizerV   rW   )r=   r>   
isinstancecollectionsabcIterablerS   r   Conv2d
projection	LayerNormnormalization)r?   rS   rT   rU   rV   rW   r@   s         r&   r>   CvtConvEmbeddings.__init__s   sZ    #-j+//:R:R#S#SZZdYq
$))Llst\\)4r%   c                    U R                  U5      nUR                  u  p#pEXE-  nUR                  X#U5      R                  SSS5      nU R                  (       a  U R	                  U5      nUR                  SSS5      R                  X#XE5      nU$ Nr      r   )rm   r/   viewpermutero   )r?   r`   
batch_sizerT   heightwidthhidden_sizes          r&   rC   CvtConvEmbeddings.forwardz   s    |42>2D2D/
&n#((;OWWXY[\^_`--l;L#++Aq!499*TZbr%   )ro   rS   rm   rc   rN   s   @r&   rX   rX   n   s    5
 
r%   rX   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )CvtSelfAttentionConvProjection   c           
         > [         TU ]  5         [        R                  " UUUUUSUS9U l        [        R
                  " U5      U l        g )NF)rg   rW   rV   biasgroups)r=   r>   r   rl   convolutionBatchNorm2dro   )r?   rU   rg   rW   rV   r@   s        r&   r>   'CvtSelfAttentionConvProjection.__init__   sG    99#
  ^^I6r%   c                 J    U R                  U5      nU R                  U5      nU$ r<   r   ro   r?   ra   s     r&   rC   &CvtSelfAttentionConvProjection.forward   s(    ''5)),7r%   r   r   r   r   r   r>   rC   r$   rM   rN   s   @r&   r|   r|      s    7 r%   r|   c                       \ rS rSrS rSrg) CvtSelfAttentionLinearProjection   c                 r    UR                   u  p#pEXE-  nUR                  X#U5      R                  SSS5      nU$ rr   )r/   rt   ru   )r?   ra   rv   rT   rw   rx   ry   s          r&   rC   (CvtSelfAttentionLinearProjection.forward   sC    2>2D2D/
&n#((;OWWXY[\^_`r%   r   N)r   r   r   r   rC   r$   r   r%   r&   r   r      s    r%   r   c                   2   ^  \ rS rSrSU 4S jjrS rSrU =r$ )CvtSelfAttentionProjection   c                 n   > [         TU ]  5         US:X  a  [        XX45      U l        [	        5       U l        g )Ndw_bn)r=   r>   r|   convolution_projectionr   linear_projection)r?   rU   rg   rW   rV   projection_methodr@   s         r&   r>   #CvtSelfAttentionProjection.__init__   s1    '*Hah*qD'!A!Cr%   c                 J    U R                  U5      nU R                  U5      nU$ r<   r   r   r   s     r&   rC   "CvtSelfAttentionProjection.forward   s(    22<@--l;r%   r   )r   r   rN   s   @r&   r   r      s    D r%   r   c                   :   ^  \ rS rSr SU 4S jjrS rS rSrU =r$ )CvtSelfAttention   c                   > [         TU ]  5         US-  U l        Xl        X l        Xl        [        UUUUUS:X  a  SOUS9U l        [        X#XWUS9U l        [        X#XWUS9U l	        [        R                  " X"U	S9U l        [        R                  " X"U	S9U l        [        R                  " X"U	S9U l        [        R                  " U
5      U l        g )Ng      avglinear)r   )r   )r=   r>   scalewith_cls_tokenrU   	num_headsr   convolution_projection_queryconvolution_projection_keyconvolution_projection_valuer   Linearprojection_queryprojection_keyprojection_valuerZ   r[   )r?   r   rU   rg   	padding_q
padding_kvstride_q	stride_kvqkv_projection_methodqkv_biasattention_drop_rater   kwargsr@   s                r&   r>   CvtSelfAttention.__init__   s     	_
,"",F*?5*HhNc-
) +EJMb+
' -GJMb-
) !#		)X N ii	8L "		)X Nzz"56r%   c                     UR                   u  p#nU R                  U R                  -  nUR                  X#U R                  U5      R	                  SSSS5      $ )Nr   rs   r   r   )r/   rU   r   rt   ru   )r?   ra   rv   ry   _head_dims         r&   "rearrange_for_multi_head_attention3CvtSelfAttention.rearrange_for_multi_head_attention   sR    %1%7%7"
>>T^^3  $..(S[[\]_`bcefggr%   c                 R   U R                   (       a  [        R                  " USX#-  /S5      u  pAUR                  u  pVnUR	                  SSS5      R                  XWX#5      nU R                  U5      nU R                  U5      n	U R                  U5      n
U R                   (       aC  [        R                  " WU	4SS9n	[        R                  " XH4SS9n[        R                  " XJ4SS9n
U R                  U R                  -  nU R                  U R                  U	5      5      n	U R                  U R                  U5      5      nU R                  U R                  U
5      5      n
[        R                   " SX/5      U R"                  -  n[        R$                  R&                  R)                  USS9nU R+                  U5      n[        R                   " SX/5      nUR                  u    pnUR	                  SSSS5      R-                  5       R                  XVU R                  U-  5      nU$ )	Nr   r   rs   dimzbhlk,bhtk->bhltzbhlt,bhtv->bhlvr   )r   r!   splitr/   ru   rt   r   r   r   catrU   r   r   r   r   r   einsumr   r   
functionalsoftmaxr[   
contiguous)r?   ra   rw   rx   	cls_tokenrv   ry   rT   keyqueryvaluer   attention_scoreattention_probscontextr   s                   r&   rC   CvtSelfAttention.forward   s   &+kk,FN@SUV&W#I0<0B0B-
#++Aq!499*TZb--l;11,?11,?IIy%0a8E))Y,!4CIIy0a8E>>T^^3778M8Me8TU55d6I6I#6NO778M8Me8TU,,'85,G$**T((--55o25N,,7,,0?2JK&}}11//!Q1-88:??
Y]YgYgjrYrsr%   )r   r   r   r[   rU   r   r   r   r   r   r   T)	r   r   r   r   r>   r   rC   r$   rM   rN   s   @r&   r   r      s     '7Rh r%   r   c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )CvtSelfOutputi  z
The residual connection is defined in CvtLayer instead of here (as is the case with other models), due to the
layernorm applied before each block.
c                    > [         TU ]  5         [        R                  " X5      U l        [        R
                  " U5      U l        g r<   )r=   r>   r   r   denserZ   r[   )r?   rU   	drop_rater@   s      r&   r>   CvtSelfOutput.__init__  s.    YYy4
zz),r%   c                 J    U R                  U5      nU R                  U5      nU$ r<   r   r[   r?   ra   input_tensors      r&   rC   CvtSelfOutput.forward  s$    zz,/||L1r%   r   rc   rN   s   @r&   r   r     s    
-
 r%   r   c                   :   ^  \ rS rSr SU 4S jjrS rS rSrU =r$ )CvtAttentioni  c                    > [         TU ]  5         [        UUUUUUUUU	U
U5      U l        [	        X+5      U l        [        5       U l        g r<   )r=   r>   r   	attentionr   r6   setpruned_heads)r?   r   rU   rg   r   r   r   r   r   r   r   r   r   r@   s                r&   r>   CvtAttention.__init__  sU     	)!
 $I9Er%   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r   )lenr   r   num_attention_headsattention_head_sizer   r   r   r   r   r6   r   all_head_sizeunion)r?   headsindexs      r&   prune_headsCvtAttention.prune_heads4  s   u:?7>>55t~~7Y7Y[_[l[l

  2$..2F2FN/0B0BEJ1$..2F2FN.t{{/@/@%QO .2^^-O-ORUV[R\-\*'+~~'I'IDNNLnLn'n$ --33E:r%   c                 L    U R                  XU5      nU R                  XA5      nU$ r<   )r   r6   )r?   ra   rw   rx   self_outputattention_outputs         r&   rC   CvtAttention.forwardF  s'    nn\5A;;{Ar%   )r   r6   r   r   )	r   r   r   r   r>   r   rC   r$   rM   rN   s   @r&   r   r     s     "@;$   r%   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )CvtIntermediateiL  c                    > [         TU ]  5         [        R                  " U[	        X-  5      5      U l        [        R                  " 5       U l        g r<   )r=   r>   r   r   intr   GELU
activation)r?   rU   	mlp_ratior@   s      r&   r>   CvtIntermediate.__init__M  s5    YYy#i.C*DE
'')r%   c                 J    U R                  U5      nU R                  U5      nU$ r<   )r   r   r   s     r&   rC   CvtIntermediate.forwardR  s$    zz,/|4r%   )r   r   r   rN   s   @r&   r   r   L  s    $
 r%   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )	CvtOutputiX  c                    > [         TU ]  5         [        R                  " [	        X-  5      U5      U l        [        R                  " U5      U l        g r<   )r=   r>   r   r   r   r   rZ   r[   )r?   rU   r   r   r@   s       r&   r>   CvtOutput.__init__Y  s8    YYs9#899E
zz),r%   c                 R    U R                  U5      nU R                  U5      nX-   nU$ r<   r   r   s      r&   rC   CvtOutput.forward^  s,    zz,/||L1#2r%   r   r   rN   s   @r&   r   r   X  s    -
 r%   r   c                   8   ^  \ rS rSrSr SU 4S jjrS rSrU =r$ )CvtLayerie  zZ
CvtLayer composed by attention layers, normalization and multi-layer perceptrons (mlps).
c                 X  > [         TU ]  5         [        UUUUUUUUU	U
UU5      U l        [	        X,5      U l        [        X,U5      U l        US:  a	  [        US9O[        R                  " 5       U l        [        R                  " U5      U l        [        R                  " U5      U l        g )Nr,   rI   )r=   r>   r   r   r   intermediater   r6   r9   r   Identityr7   rn   layernorm_beforelayernorm_after)r?   r   rU   rg   r   r   r   r   r   r   r   r   r   drop_path_rater   r@   s                  r&   r>   CvtLayer.__init__j  s    " 	%!
 ,IA	i@BPSVBV~>\^\g\g\i "Y 7!||I6r%   c                     U R                  U R                  U5      UU5      nUnU R                  U5      nXQ-   nU R                  U5      nU R	                  U5      nU R                  Xa5      nU R                  U5      nU$ r<   )r   r   r7   r  r   r6   )r?   ra   rw   rx   self_attention_outputr   layer_outputs          r&   rC   CvtLayer.forward  s     $!!,/!

 1>>*:; (6 ++L9((6 {{<>~~l3r%   )r   r7   r   r  r   r6   r   rc   rN   s   @r&   r   r   e  s    & %7N r%   r   c                   .   ^  \ rS rSrU 4S jrS rSrU =r$ )CvtStagei  c                   > [         TU ]  5         Xl        X l        U R                  R                  U R                     (       aH  [
        R                  " [        R                  " SSU R                  R                  S   5      5      U l        [        UR                  U R                     UR                  U R                     U R                  S:X  a  UR                  OUR                  U R                  S-
     UR                  U R                     UR                  U R                     UR                  U R                     S9U l        [        R"                  " SUR$                  U R                     UR&                  U   SS9 Vs/ s H  o3R)                  5       PM     nn[
        R*                  " [-        UR&                  U R                     5       Vs/ s GHQ  n[/        UR0                  U R                     UR                  U R                     UR2                  U R                     UR4                  U R                     UR6                  U R                     UR8                  U R                     UR:                  U R                     UR<                  U R                     UR>                  U R                     UR@                  U R                     UR                  U R                     X@R                     URB                  U R                     UR                  U R                     S9PGMT     sn6 U l"        g s  snf s  snf )Nr   r   r   )rS   rV   rT   rU   rW   r\   cpu)r.   )r   rU   rg   r   r   r   r   r   r   r   r   r  r   r   )#r=   r>   configstager   r   	Parameterr!   randnrU   rP   patch_sizespatch_striderT   patch_paddingr   	embeddinglinspacer  depthitem
Sequentialranger   r   
kernel_qkvr   r   r   r   r   r   r   r   layers)r?   r  r  xdrop_path_ratesr   r@   s         r&   r>   CvtStage.__init__  s   
;;  ,\\%++aDKK<Q<QRT<U*VWDN&))$**5&&tzz204

a,,VEUEUVZV`V`cdVdEe&&tzz2((4))$**5
 $nnQ0E0Edjj0QSYS_S_`eSfotu
uFFHu 	 
 mm$ v||DJJ78#" 9A! $..tzz:$..tzz: & 1 1$** =$..tzz:%00<$..tzz:#__TZZ8*0*F*Ftzz*R#__TZZ8(.(B(B4::(N$..tzz:#2::#>$..tzz:#)#3#3DJJ#?  9#
	

s   L7EL<c                 Z   S nU R                  U5      nUR                  u  p4pVUR                  X4XV-  5      R                  SSS5      nU R                  R
                  U R                     (       a3  U R
                  R                  USS5      n[        R                  " X!4SS9nU R                   H  nU" XU5      nUnM     U R                  R
                  U R                     (       a  [        R                  " USXV-  /S5      u  p!UR                  SSS5      R                  X4XV5      nX4$ )Nr   rs   r   r   r   )r  r/   rt   ru   r  r   r  expandr!   r   r  r   )	r?   ra   r   rv   rT   rw   rx   layerlayer_outputss	            r&   rC   CvtStage.forward  s   	~~l32>2D2D/
&#((6>RZZ[\^_abc;;  ,--j"bAI 99i%>AFL[[E!,>M(L ! ;;  ,&+kk,FN@SUV&W#I#++Aq!499*TZb&&r%   )r   r  r  r  r  r   rN   s   @r&   r	  r	    s    (
T' 'r%   r	  c                   2   ^  \ rS rSrU 4S jrSS jrSrU =r$ )
CvtEncoderi  c                    > [         TU ]  5         Xl        [        R                  " / 5      U l        [        [        UR                  5      5       H'  nU R
                  R                  [        X5      5        M)     g r<   )r=   r>   r  r   
ModuleListstagesr  r   r  appendr	  )r?   r  	stage_idxr@   s      r&   r>   CvtEncoder.__init__  sR    mmB's6<<01IKKx:; 2r%   c                     U(       a  SOS nUnS n[        U R                  5       H  u  pxU" U5      u  pVU(       d  M  XE4-   nM     U(       d  [        S XVU4 5       5      $ [        UUUS9$ )Nr   c              3   .   #    U  H  oc  M  Uv   M     g 7fr<   r   ).0vs     r&   	<genexpr>%CvtEncoder.forward.<locals>.<genexpr>  s     b$Pq$Ps   	r   r   r   )	enumerater'  tupler   )	r?   r`   output_hidden_statesreturn_dictall_hidden_statesra   r   r   stage_modules	            r&   rC   CvtEncoder.forward  s|    "6BD#	!*4;;!7A&2<&@#L##$5$G! "8
 b\>O$Pbbb**%+
 	
r%   )r  r'  )FTr   rN   s   @r&   r$  r$    s    <
 
r%   r$  c                   ,    \ rS rSr\rSrSrS/rS r	Sr
g)CvtPreTrainedModeli  cvtr`   r   c                 p   [        U[        R                  [        R                  45      (       a  [        R                  R                  UR                  R                  SU R                  R                  S9UR                  l        UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR                  R                  R                  S5        g[        U[        5      (       a  U R                  R                  UR                      (       aW  [        R                  R                  UR                  R                  SU R                  R                  S9UR                  l        ggg)zInitialize the weightsr,   )meanstdNg      ?)rh   r   r   rl   inittrunc_normal_weightdatar  initializer_ranger   zero_rn   fill_r	  r   r  )r?   modules     r&   _init_weights CvtPreTrainedModel._init_weights	  s&   fryy"))455!#!6!6v}}7I7IPSY]YdYdYvYv!6!wFMM{{&  &&( '--KK""$MM$$S))){{$$V\\2(*(=(=$$))9V9V )> )  % 3 *r%   r   N)r   r   r   r   r   config_classbase_model_prefixmain_input_name_no_split_modulesrG  r$   r   r%   r&   r:  r:    s    L$O#r%   r:  c                      ^  \ rS rSrS
U 4S jjrS r\   SS\\R                     S\\
   S\\
   S\\\4   4S jj5       rS	rU =r$ )CvtModeli  c                 p   > [         TU ]  U5        Xl        [        U5      U l        U R                  5         g)z^
add_pooling_layer (bool, *optional*, defaults to `True`):
    Whether to add a pooling layer
N)r=   r>   r  r$  encoder	post_init)r?   r  add_pooling_layerr@   s      r&   r>   CvtModel.__init__  s-    
 	 !&)r%   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsrP  r   r   r   )r?   heads_to_pruner   r   s       r&   _prune_headsCvtModel._prune_heads%  s<    
 +002LELLu%//;;EB 3r%   r`   r4  r5  r*   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nUc  [        S5      eU R	                  UUUS9nUS   nU(       d	  U4USS  -   $ [        UUR                  UR                  S9$ )Nz You have to specify pixel_valuesr4  r5  r   r   r1  )r  r4  use_return_dict
ValueErrorrP  r   r   r   )r?   r`   r4  r5  encoder_outputssequence_outputs         r&   rC   CvtModel.forward-  s     %9$D $++JjJj 	 &1%<k$++B]B]?@@,,!5# ' 

 *!,#%(;;;*-+;;)77
 	
r%   )r  rP  r   )NNN)r   r   r   r   r>   rW  r   r   r!   rK   boolr   r   r   rC   r$   rM   rN   s   @r&   rN  rN    so    C  04/3&*	
u||,
 'tn
 d^	

 
u11	2
 
r%   rN  z
    Cvt Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    )custom_introc                      ^  \ rS rSrU 4S jr\    S
S\\R                     S\\R                     S\\	   S\\	   S\
\\4   4
S jj5       rS	rU =r$ )CvtForImageClassificationiM  c                   > [         TU ]  U5        UR                  U l        [        USS9U l        [
        R                  " UR                  S   5      U l        UR                  S:  a.  [
        R                  " UR                  S   UR                  5      O[
        R                  " 5       U l        U R                  5         g )NF)rR  r   r   )r=   r>   
num_labelsrN  r;  r   rn   rU   	layernormr   r   
classifierrQ  )r?   r  r@   s     r&   r>   "CvtForImageClassification.__init__T  s      ++Fe<f&6&6r&:; CIBSBSVWBWBIIf&&r*F,=,=>]_]h]h]j 	
 	r%   r`   labelsr4  r5  r*   c                 n   Ub  UOU R                   R                  nU R                  UUUS9nUS   nUS   nU R                   R                  S   (       a  U R	                  U5      nOEUR
                  u  ppUR                  XX-  5      R                  SSS5      nU R	                  U5      nUR                  SS9nU R                  U5      nSnUGb  U R                   R                  c  U R                   R                  S:X  a  SU R                   l
        OyU R                   R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  S	U R                   l
        OS
U R                   l
        U R                   R                  S:X  aS  [!        5       nU R                   R                  S:X  a&  U" UR#                  5       UR#                  5       5      nOU" X5      nOU R                   R                  S	:X  aG  [%        5       nU" UR                  SU R                   R                  5      UR                  S5      5      nO,U R                   R                  S
:X  a  ['        5       nU" X5      nU(       d  U4USS -   nUb  U4U-   $ U$ [)        XUR*                  S9$ )ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
NrZ  r   r   r   rs   r   
regressionsingle_label_classificationmulti_label_classification)losslogitsr   )r  r[  r;  r   rf  r/   rt   ru   r=  rg  problem_typere  r-   r!   longr   r
   squeezer	   r   r   r   )r?   r`   ri  r4  r5  outputsr^  r   rv   rT   rw   rx   sequence_output_meanro  rn  loss_fctr6   s                    r&   rC   !CvtForImageClassification.forwardb  sQ    &1%<k$++B]B]((!5#  
 "!*AJ	;;  $"nnY7O6E6K6K3Jf-22:V^\ddefhiklmO"nn_=O.333:!56{{''/;;))Q./;DKK,[[++a/V\\UZZ5OSYS_S_chclclSl/LDKK,/KDKK,{{''<7"9;;))Q.#FNN$4fnn6FGD#F3D))-JJ+-B0F0F GUWY))-II,./Y,F)-)9TGf$EvE3\c\q\qrrr%   )rg  r;  rf  re  )NNNN)r   r   r   r   r>   r   r   r!   rK   r`  r   r   r   rC   r$   rM   rN   s   @r&   rc  rc  M  s      04)-/3&*<su||,<s &<s 'tn	<s
 d^<s 
u::	;<s <sr%   rc  )rc  rN  r:  )r,   F)7r    collections.abcri   dataclassesr   typingr   r   r   r!   torch.utils.checkpointr   torch.nnr   r	   r
   modeling_outputsr   r   modeling_utilsr   r   r   utilsr   r   configuration_cvtr   
get_loggerr   loggerr   rK   rJ   r`  r7   Moduler9   rP   rX   r|   r   r   r   r   r   r   r   r   r	  r$  r:  rN  rc  __all__r   r%   r&   <module>r     s     ! ) )    A A Q c c , ( 
		H	% B+ B B*U\\ e T V[VbVb *-")) -BII $		 2RYY (ryy 
 
Nryy NbBII "6 299 6 r	bii 	
		 
?ryy ?D<'ryy <'~
 
8   , 0
! 0
 0
f Ls 2 LsLs^ Jr%   