
    fThe              	          S r SSKrSSKrSSKJrJrJrJr  SSKrSSK	J
s  Jr  SSKrSSKJ
r
  SSKJrJrJr  SSKJr  SSKJrJr  SS	KJr  SS
KJrJr  SSKJrJr  SSKJ r   \RB                  " \"5      r#S,S\RH                  S\%S\&S\RH                  4S jjr' " S S\
RP                  5      r) " S S\
RP                  5      r* " S S\
RP                  5      r+ " S S\
RP                  5      r, " S S\
RP                  5      r- " S S\
RP                  5      r. " S S \
RP                  5      r/ " S! S"\
RP                  5      r0\ " S# S$\5      5       r1\ " S% S&\15      5       r2\" S'S(9 " S) S*\15      5       r3/ S+Qr4g)-zPyTorch PVT model.    N)IterableOptionalTupleUnion)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputImageClassifierOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging   )	PvtConfiginput	drop_probtrainingreturnc                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )a*  
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
argument.
        r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          \/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/pvt/modeling_pvt.py	drop_pathr(   )   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                      ^  \ rS rSrSrSS\\   SS4U 4S jjjrS\R                  S\R                  4S jr
S\4S	 jrS
rU =r$ )PvtDropPath>   zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 .   > [         TU ]  5         Xl        g N)super__init__r   )selfr   	__class__s     r'   r0   PvtDropPath.__init__A   s    "r)   hidden_statesc                 B    [        XR                  U R                  5      $ r.   )r(   r   r   r1   r4   s     r'   forwardPvtDropPath.forwardE   s    FFr)   c                 8    SR                  U R                  5      $ )Nzp={})formatr   )r1   s    r'   
extra_reprPvtDropPath.extra_reprH   s    }}T^^,,r)   )r   r.   )__name__
__module____qualname____firstlineno____doc__r   floatr0   r    Tensorr7   strr;   __static_attributes____classcell__r2   s   @r'   r+   r+   >   sQ    b#(5/ #T # #GU\\ Gell G-C - -r)   r+   c                      ^  \ rS rSrSr SS\S\\\\   4   S\\\\   4   S\S\S\S	\	4U 4S
 jjjr
S\R                  S\S\S\R                  4S jrS\R                  S\\R                  \\4   4S jrSrU =r$ )PvtPatchEmbeddingsL   z
This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
`hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
Transformer.
config
image_size
patch_sizestridenum_channelshidden_size	cls_tokenc                   > [         T	U ]  5         Xl        [        U[        R
                  R                  5      (       a  UOX"4n[        U[        R
                  R                  5      (       a  UOX34nUS   US   -  US   US   -  -  nX l        X0l        XPl	        Xl
        [        R                  " [        R                  " SU(       a  US-   OUU5      5      U l        U(       a,  [        R                  " [        R                   " SSU5      5      OS U l        [        R$                  " XVXCS9U l        [        R(                  " XaR*                  S9U l        [        R.                  " UR0                  S9U l        g )Nr   r   kernel_sizerN   eps)p)r/   r0   rK   
isinstancecollectionsabcr   rL   rM   rO   num_patchesr   	Parameterr    randnposition_embeddingszerosrQ   Conv2d
projection	LayerNormlayer_norm_eps
layer_normDropouthidden_dropout_probdropout)
r1   rK   rL   rM   rN   rO   rP   rQ   r[   r2   s
            r'   r0   PvtPatchEmbeddings.__init__S   s    	#-j+//:R:R#S#SZZdYq
#-j+//:R:R#S#SZZdYq
!!}
15*Q-:VW=:XY$$(&#%<<KKi;?[+V$
  JSekk!Q&DEX\))L6e,,{8M8MNzzF$>$>?r)   
embeddingsheightwidthr   c                    X#-  n[         R                  R                  5       (       d<  X@R                  R                  U R                  R                  -  :X  a  U R
                  $ UR                  SX#S5      R                  SSSS5      n[        R                  " XU4SS9nUR                  SSX#-  5      R                  SSS5      nU$ )Nr   r   r      bilinear)sizemode)
r    jit
is_tracingrK   rL   r^   reshapepermuteFinterpolate)r1   ri   rj   rk   r[   interpolated_embeddingss         r'   interpolate_pos_encoding+PvtPatchEmbeddings.interpolate_pos_encodingo   s    n yy##%%+9O9ORVR]R]RhRh9h*h+++''6"=EEaAqQ
"#--
%Wa"b"9"A"A!R"X"`"`abdegh"i&&r)   pixel_valuesc                 ~   UR                   u  p#pEX0R                  :w  a  [        S5      eU R                  U5      nUR                   Gt ptnUR	                  S5      R                  SS5      nU R                  U5      nU R                  b  U R                  R                  USS5      n	[        R                  " X4SS9nU R                  U R                  S S 2SS 24   XE5      n
[        R                  " U R                  S S 2S S24   U
4SS9n
OU R                  U R                  XE5      n
U R                  X-   5      nXU4$ )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rn   r   rm   dim)r   rO   
ValueErrorra   flatten	transposerd   rQ   expandr    catry   r^   rg   )r1   r{   
batch_sizerO   rj   rk   patch_embed_ri   rQ   r^   s              r'   r7   PvtPatchEmbeddings.forwardz   s8   2>2D2D/
&,,,w  ool3'--E!))!,66q!<__[1
>>%--j"bAII#:BJ"&"?"?@X@XYZ\]\^Y^@_ag"o"'))T-E-Ea!e-LNa,bhi"j"&"?"?@X@XZ`"h\\*"BC
5((r)   )
rQ   rK   rg   rL   rd   rO   r[   rM   r^   ra   F)r=   r>   r?   r@   rA   r   r   intr   boolr0   r    rC   ry   r   r7   rE   rF   rG   s   @r'   rI   rI   L   s      @@ #x},-@ #x},-	@
 @ @ @ @ @8	'5<< 	' 	'UX 	']b]i]i 	')ELL )U5<<c;Q5R ) )r)   rI   c                   n   ^  \ rS rSrS\S\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	PvtSelfOutput   rK   rP   c                    > [         TU ]  5         [        R                  " X"5      U l        [        R
                  " UR                  5      U l        g r.   )r/   r0   r   Lineardensere   rf   rg   )r1   rK   rP   r2   s      r'   r0   PvtSelfOutput.__init__   s4    YY{8
zz&"<"<=r)   r4   r   c                 J    U R                  U5      nU R                  U5      nU$ r.   r   rg   r6   s     r'   r7   PvtSelfOutput.forward   s$    

=1]3r)   r   )r=   r>   r?   r@   r   r   r0   r    rC   r7   rE   rF   rG   s   @r'   r   r      s6    >y >s >
U\\ ell  r)   r   c                      ^  \ rS rSrSrS\S\S\S\4U 4S jjrS\S	\	R                  4S
 jr SS\	R                  S\S\S\S	\\	R                     4
S jjrSrU =r$ )PvtEfficientSelfAttention   zpEfficient self-attention mechanism with reduction of the sequence [PvT paper](https://arxiv.org/abs/2102.12122).rK   rP   num_attention_headssequences_reduction_ratioc                 ~  > [         TU ]  5         X l        X0l        U R                  U R                  -  S:w  a&  [	        SU R                   SU R                   S35      e[        U R                  U R                  -  5      U l        U R                  U R                  -  U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " U R                  U R                  UR                  S9U l        [        R                  " UR                  5      U l        X@l        US:  a>  [        R$                  " X"XDS9U l        [        R(                  " X!R*                  S9U l        g g )	Nr   zThe hidden size (z6) is not a multiple of the number of attention heads ())biasr   rS   rU   )r/   r0   rP   r   r   r   attention_head_sizeall_head_sizer   r   qkv_biasquerykeyvaluere   attention_probs_dropout_probrg   r   r`   sequence_reductionrb   rc   rd   r1   rK   rP   r   r   r2   s        r'   r0   "PvtEfficientSelfAttention.__init__   se    	&#6 d666!;#D$4$4#5 622316 
 $'t'7'7$:R:R'R#S !558P8PPYYt//1C1C&//Z
99T--t/A/AXYYt//1C1C&//Z
zz&"E"EF)B&$q(&(ii6O'D# !ll;<Q<QRDO	 )r)   r4   r   c                     UR                  5       S S U R                  U R                  4-   nUR                  U5      nUR	                  SSSS5      $ )Nrm   r   rn   r   r   )rp   r   r   viewru   )r1   r4   	new_shapes      r'   transpose_for_scores.PvtEfficientSelfAttention.transpose_for_scores   sT    !&&("-1I1I4KcKc0dd	%**95$$Q1a00r)   rj   rk   output_attentionsc                    U R                  U R                  U5      5      nU R                  S:  aw  UR                  u  pgnUR	                  SSS5      R                  XhX#5      nU R                  U5      nUR                  XhS5      R	                  SSS5      nU R                  U5      nU R                  U R                  U5      5      n	U R                  U R                  U5      5      n
[        R                  " XYR                  SS5      5      nU[        R                  " U R                  5      -  n[         R"                  R%                  USS9nU R'                  U5      n[        R                  " X5      nUR	                  SSSS5      R)                  5       nUR+                  5       S S U R,                  4-   nUR/                  U5      nU(       a  X4nU$ U4nU$ )Nr   r   rn   rm   r}   r   )r   r   r   r   ru   rt   r   rd   r   r   r    matmulr   mathsqrtr   r   
functionalsoftmaxrg   
contiguousrp   r   r   )r1   r4   rj   rk   r   query_layerr   seq_lenrO   	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shapeoutputss                   r'   r7   !PvtEfficientSelfAttention.forward   s    //

=0IJ))A-0=0C0C-J)11!Q:BB:]ckM 33MBM)11*BOWWXY[\^_`M OOM:M--dhh}.EF	//

=0IJ !<<5H5HR5PQ+dii8P8P.QQ --//0@b/I ,,7_B%--aAq9DDF"/"4"4"6s";t?Q?Q>S"S%**+BC6G=2 O\M]r)   )r   r   rg   rP   r   rd   r   r   r   r   r   r   )r=   r>   r?   r@   rA   r   r   rB   r0   r    rC   r   r   r   r7   rE   rF   rG   s   @r'   r   r      s    zSS.1SHKShmS:1# 1%,, 1 #(*||* * 	*
  * 
u||	* *r)   r   c                      ^  \ rS rSrS\S\S\S\4U 4S jjrS r SS\	R                  S	\S
\S\S\\	R                     4
S jjrSrU =r$ )PvtAttention   rK   rP   r   r   c                 ~   > [         TU ]  5         [        UUUUS9U l        [	        XS9U l        [        5       U l        g )N)rP   r   r   )rP   )r/   r0   r   r1   r   r&   setpruned_headsr   s        r'   r0   PvtAttention.__init__   s@     	-# 3&?	
	 $FDEr)   c                 6   [        U5      S:X  a  g [        XR                  R                  U R                  R                  U R
                  5      u  p[        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l        [        U R                  R                  U5      U R                  l	        [        U R                  R                  USS9U R                  l        U R                  R                  [        U5      -
  U R                  l        U R                  R                  U R                  R                  -  U R                  l        U R
                  R                  U5      U l        g )Nr   r   r}   )lenr   r1   r   r   r   r   r   r   r   r&   r   r   union)r1   headsindexs      r'   prune_headsPvtAttention.prune_heads   s   u:?79900$))2O2OQUQbQb

 -TYY__eD		*499==%@		,TYY__eD		.t{{/@/@%QO )-		(E(EE
(R		%"&))"?"?$))B_B_"_		 --33E:r)   r4   rj   rk   r   r   c                 d    U R                  XX45      nU R                  US   5      nU4USS  -   nU$ )Nr   r   )r1   r&   )r1   r4   rj   rk   r   self_outputsattention_outputr   s           r'   r7   PvtAttention.forward  s@     yyQ;;|A7#%QR(88r)   )r&   r   r1   r   )r=   r>   r?   r@   r   r   rB   r0   r   r    rC   r   r   r7   rE   rF   rG   s   @r'   r   r      ss    "".1"HK"hm";& _d"\\36?BW[	u||	 r)   r   c            
          ^  \ rS rSr  SS\S\S\\   S\\   4U 4S jjjrS\R                  S\R                  4S	 jr
S
rU =r$ )PvtFFNi  rK   in_featureshidden_featuresout_featuresc                 x  > [         TU ]  5         Ub  UOUn[        R                  " X#5      U l        [        UR                  [        5      (       a  [        UR                     U l	        OUR                  U l	        [        R                  " X45      U l
        [        R                  " UR                  5      U l        g r.   )r/   r0   r   r   dense1rX   
hidden_actrD   r   intermediate_act_fndense2re   rf   rg   )r1   rK   r   r   r   r2   s        r'   r0   PvtFFN.__init__  s     	'3'?|[ii=f''--'-f.?.?'@D$'-'8'8D$ii>zz&"<"<=r)   r4   r   c                     U R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU R                  U5      nU$ r.   )r   r   rg   r   r6   s     r'   r7   PvtFFN.forward*  sP    M200?]3M2]3r)   )r   r   rg   r   )NN)r=   r>   r?   r@   r   r   r   r0   r    rC   r7   rE   rF   rG   s   @r'   r   r     sc    
 *.&*>> > "#	>
 sm> >"U\\ ell  r)   r   c                   v   ^  \ rS rSrS\S\S\S\S\S\4U 4S jjrSS	\R                  S
\S\S\
4S jjrSrU =r$ )PvtLayeri3  rK   rP   r   r(   r   	mlp_ratioc                 ^  > [         TU ]  5         [        R                  " X!R                  S9U l        [        UUUUS9U l        US:  a  [        U5      O[        R                  " 5       U l
        [        R                  " X!R                  S9U l        [        X&-  5      n[        XUS9U l        g )NrU   )rK   rP   r   r   r   )rK   r   r   )r/   r0   r   rb   rc   layer_norm_1r   	attentionr+   Identityr(   layer_norm_2r   r   mlp)	r1   rK   rP   r   r(   r   r   mlp_hidden_sizer2   s	           r'   r0   PvtLayer.__init__4  s     	LL:O:OP%# 3&?	
 4=s?Y/LL:O:OPk56Rabr)   r4   rj   rk   r   c                     U R                  U R                  U5      UUUS9nUS   nUSS  nU R                  U5      nXa-   nU R                  U R	                  U5      5      nU R                  U5      nX-   n	U	4U-   nU$ )N)r4   rj   rk   r   r   r   )r   r   r(   r   r   )
r1   r4   rj   rk   r   self_attention_outputsr   r   
mlp_outputlayer_outputs
             r'   r7   PvtLayer.forwardJ  s    !%++M:/	 "0 "
 2!4(,>>*:;(8XXd//>?
^^J/
$1/G+r)   )r   r(   r   r   r   r   )r=   r>   r?   r@   r   r   rB   r0   r    rC   r   r7   rE   rF   rG   s   @r'   r   r   3  st    cc c !	c
 c $)c c,U\\ 3 s _c  r)   r   c                      ^  \ rS rSrS\4U 4S jjr   SS\R                  S\\	   S\\	   S\\	   S\
\\4   4
S	 jjrS
rU =r$ )
PvtEncoderia  rK   c                   > [         T	U ]  5         Xl        [        R                  " SUR
                  [        UR                  5      SS9R                  5       n/ n[        UR                  5       H  nUR                  [        UUS:X  a  UR                  OU R                  R                  SUS-   -  -  UR                  U   UR                  U   US:X  a  UR                   OUR"                  US-
     UR"                  U   XAR                  S-
  :H  S95        M     [$        R&                  " U5      U l        / nSn[        UR                  5       H  n/ nUS:w  a  XaR                  US-
     -  n[        UR                  U   5       HY  nUR                  [+        UUR"                  U   UR,                  U   X&U-      UR.                  U   UR0                  U   S95        M[     UR                  [$        R&                  " U5      5        M     [$        R&                  " U5      U l        [$        R4                  " UR"                  S   UR6                  S	9U l        g )
Nr   cpu)r   rn   r   )rK   rL   rM   rN   rO   rP   rQ   )rK   rP   r   r(   r   r   rm   rU   )r/   r0   rK   r    linspacedrop_path_ratesumdepthstolistrangenum_encoder_blocksappendrI   rL   patch_sizesstridesrO   hidden_sizesr   
ModuleListpatch_embeddingsr   r   sequence_reduction_ratios
mlp_ratiosblockrb   rc   rd   )
r1   rK   drop_path_decaysri   iblockscurlayersjr2   s
            r'   r0   PvtEncoder.__init__b  s    !>>!V-B-BCDV_delln 
v001A"!45Fv00@V@V[\abefaf[g@h%11!4!>>!,89Q!4!4FDWDWXY\]X]D^ & 3 3A 6#<#<q#@@
 2 !#j 9 v001AFAv}}QU++6==+,%$*$7$7$:,2,F,Fq,I"27";282R2RST2U"("3"3A"6	 - MM"--/0! 2$ ]]6*
 ,,v':':2'>FDYDYZr)   r{   r   output_hidden_statesreturn_dictr   c                 d   U(       a  SOS nU(       a  SOS nUR                   S   n[        U R                  5      nUn	[        [	        U R
                  U R                  5      5       H  u  n
u  pU" U	5      u  pnU H/  nU" XX5      nUS   n	U(       a	  UUS   4-   nU(       d  M*  XY4-   nM1     XS-
  :w  d  MR  U	R                  X}US5      R                  SSSS5      R                  5       n	M     U R                  U	5      n	U(       a  XY4-   nU(       d  [        S XU4 5       5      $ [        U	UUS9$ )	N r   r   rm   r   rn   c              3   .   #    U  H  oc  M  Uv   M     g 7fr.   r  ).0vs     r'   	<genexpr>%PvtEncoder.forward.<locals>.<genexpr>  s     m$[q$[s   	last_hidden_stater4   
attentions)r   r   r  	enumeratezipr   rt   ru   r   rd   tupler   )r1   r{   r   r	  r
  all_hidden_statesall_self_attentionsr   
num_blocksr4   idxembedding_layerblock_layerrj   rk   r  layer_outputss                    r'   r7   PvtEncoder.forward  sK    #7BD$5b4!''*
_
$3<SAVAVX\XbXb=c3d/C//+:=+I(M5$ %mU V -a 0$*=qAQ@S*S'''(9<L(L% % 1n$ - 5 5j%QS T \ \]^`acdfg h s s u 4e 6 14D Dm]GZ$[mmm++*
 	
r)   )r  rK   rd   r   )FFT)r=   r>   r?   r@   r   r0   r    FloatTensorr   r   r   r   r   r7   rE   rF   rG   s   @r'   r   r   a  ss    0[y 0[j -2/4&*#
''#
 $D>#
 'tn	#

 d^#
 
uo%	&#
 #
r)   r   c                   ~    \ rS rSr\rSrSr/ rS\	\
R                  \
R                  \
R                  4   SS4S jrSrg)	PvtPreTrainedModeli  pvtr{   moduler   Nc                    [        U[        R                  5      (       a  [        R                  R	                  UR
                  R                  SU R                  R                  S9UR
                  l        UR                  b%  UR                  R                  R                  5         gg[        U[        R                  5      (       aJ  UR                  R                  R                  5         UR
                  R                  R                  S5        g[        U[        5      (       a  [        R                  R	                  UR                  R                  SU R                  R                  S9UR                  l        UR                  bW  [        R                  R	                  UR                  R                  SU R                  R                  S9UR                  l        ggg)zInitialize the weightsr   )meanstdNg      ?)rX   r   r   inittrunc_normal_weightdatarK   initializer_ranger   zero_rb   fill_rI   r^   rQ   )r1   r$  s     r'   _init_weights PvtPreTrainedModel._init_weights  s\   fbii(( "$!6!6v}}7I7IPSY]YdYdYvYv!6!wFMM{{&  &&( '--KK""$MM$$S) 233.0gg.C.C**//KK11 /D /F&&+
 +(*(=(=$$))55 )> )  % , 4r)   r  )r=   r>   r?   r@   r   config_classbase_model_prefixmain_input_name_no_split_modulesr   r   r   r`   rb   r/  rE   r  r)   r'   r"  r"    sD    L$OE"))RYY*L$M RV r)   r"  c                      ^  \ rS rSrS\4U 4S jjrS r\   SS\R                  S\
\   S\
\   S\
\   S	\\\4   4
S
 jj5       rSrU =r$ )PvtModeli  rK   c                 p   > [         TU ]  U5        Xl        [        U5      U l        U R                  5         g r.   )r/   r0   rK   r   encoder	post_initr1   rK   r2   s     r'   r0   PvtModel.__init__  s/      "&) 	r)   c                     UR                  5        H7  u  p#U R                  R                  U   R                  R	                  U5        M9     g)z
Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
class PreTrainedModel
N)itemsr8  layerr   r   )r1   heads_to_pruner>  r   s       r'   _prune_headsPvtModel._prune_heads  s<    
 +002LELLu%//;;EB 3r)   r{   r   r	  r
  r   c                 0   Ub  UOU R                   R                  nUb  UOU R                   R                  nUb  UOU R                   R                  nU R	                  UUUUS9nUS   nU(       d	  U4USS  -   $ [        UUR                  UR                  S9$ )Nr{   r   r	  r
  r   r   r  )rK   r   r	  use_return_dictr8  r   r4   r  )r1   r{   r   r	  r
  encoder_outputssequence_outputs          r'   r7   PvtModel.forward  s     2C1N-TXT_T_TqTq$8$D $++JjJj 	 &1%<k$++B]B],,%/!5#	 ' 
 *!,#%(;;;-)77&11
 	
r)   )rK   r8  )NNN)r=   r>   r?   r@   r   r0   r@  r   r    r   r   r   r   r   r   r7   rE   rF   rG   s   @r'   r6  r6    s    y C  -1/3&*
''
 $D>
 'tn	

 d^
 
uo%	&
 
r)   r6  z
    Pvt Model transformer with an image classification head on top (a linear layer on top of the final hidden state of
    the [CLS] token) e.g. for ImageNet.
    )custom_introc                      ^  \ rS rSrS\SS4U 4S jjr\    SS\\R                     S\\R                     S\\
   S	\\
   S
\\
   S\\\4   4S jj5       rSrU =r$ )PvtForImageClassificationi  rK   r   Nc                 6  > [         TU ]  U5        UR                  U l        [        U5      U l        UR                  S:  a.  [
        R                  " UR                  S   UR                  5      O[
        R                  " 5       U l	        U R                  5         g )Nr   rm   )r/   r0   
num_labelsr6  r#  r   r   r   r   
classifierr9  r:  s     r'   r0   "PvtForImageClassification.__init__  sy      ++F# FLEVEVYZEZBIIf))"-v/@/@A`b`k`k`m 	
 	r)   r{   labelsr   r	  r
  c                 :   Ub  UOU R                   R                  nU R                  UUUUS9nUS   nU R                  USS2SSS24   5      nSn	UGb  U R                   R                  c  U R
                  S:X  a  SU R                   l        OoU R
                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R                  S:X  aI  [        5       n
U R
                  S:X  a&  U
" UR                  5       UR                  5       5      n	OU
" X5      n	OU R                   R                  S:X  a=  [        5       n
U
" UR                  SU R
                  5      UR                  S5      5      n	O,U R                   R                  S:X  a  [        5       n
U
" X5      n	U(       d  U4USS -   nU	b  U	4U-   $ U$ [        U	UUR                   UR"                  S	9$ )
ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
NrC  r   r   
regressionsingle_label_classificationmulti_label_classificationrm   )losslogitsr4   r  )rK   rD  r#  rM  problem_typerL  r   r    longr   r
   squeezer	   r   r   r   r4   r  )r1   r{   rO  r   r	  r
  r   rF  rU  rT  loss_fctr&   s               r'   r7   !PvtForImageClassification.forward#  s    &1%<k$++B]B]((%/!5#	  
 "!*Aq!9:{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE$!//))	
 	
r)   )rM  rL  r#  )NNNN)r=   r>   r?   r@   r   r0   r   r   r    rC   r   r   r  r   r7   rE   rF   rG   s   @r'   rJ  rJ    s    y T   *.,0/3&*;
u||,;
 &;
 $D>	;

 'tn;
 d^;
 
u++	,;
 ;
r)   rJ  )rJ  r6  r"  )r   F)5rA   rY   r   typingr   r   r   r   r    torch.nn.functionalr   r   rv   torch.utils.checkpointtorch.nnr   r	   r
   activationsr   modeling_outputsr   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   configuration_pvtr   
get_loggerr=   loggerrC   rB   r   r(   Moduler+   rI   r   r   r   r   r   r   r"  r6  rJ  __all__r  r)   r'   <module>ri     sp  "    3 3      A A ! F - Q , ( 
		H	%U\\ e T V[VbVb *-")) -A) A)H	BII 	O		 Od'299 'TRYY 6+ryy +\V
 V
r   > 0
! 0
 0
f K
 2 K
K
\ Jr)   