
    fTh+R              	          S r SSKJrJrJr  SSKrSSKrSSKJr  SSKJ	r	J
r
Jr  SSKJr  SSKJrJrJrJr  SS	KJr  SS
KJrJr  SSKJr  SSKJr  \R8                  " \5      rS-S\R>                  S\ S\!S\R>                  4S jjr" " S S\RF                  5      r$ " S S\RF                  5      r% " S S\RF                  5      r& " S S\RF                  5      r' " S S\RF                  5      r( " S S\RF                  5      r) " S S \RF                  5      r*\ " S! S"\5      5       r+\ " S# S$\+5      5       r,\" S%S&9 " S' S(\+5      5       r-\" S)S&9 " S* S+\+\5      5       r./ S,Qr/g).zPyTorch ConvNextV2 model.    )OptionalTupleUnionN)nn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging)BackboneMixin   )ConvNextV2Configinput	drop_probtrainingreturnc                    US:X  d  U(       d  U $ SU-
  nU R                   S   4SU R                  S-
  -  -   nU[        R                  " X@R                  U R
                  S9-   nUR                  5         U R                  U5      U-  nU$ )a*  
Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
argument.
        r   r   )r   )dtypedevice)shapendimtorchrandr   r   floor_div)r   r   r   	keep_probr   random_tensoroutputs          j/var/www/auris/envauris/lib/python3.13/site-packages/transformers/models/convnextv2/modeling_convnextv2.py	drop_pathr(   )   s     CxII[[^

Q 77E

5ELL YYMYYy!M1FM    c                      ^  \ rS rSrSrSS\\   SS4U 4S jjjrS\R                  S\R                  4S jr
S\4S	 jrS
rU =r$ )ConvNextV2DropPath>   zXDrop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).Nr   r   c                 .   > [         TU ]  5         Xl        g N)super__init__r   )selfr   	__class__s     r'   r0   ConvNextV2DropPath.__init__A   s    "r)   hidden_statesc                 B    [        XR                  U R                  5      $ r.   )r(   r   r   r1   r4   s     r'   forwardConvNextV2DropPath.forwardE   s    FFr)   c                 8    SR                  U R                  5      $ )Nzp={})formatr   )r1   s    r'   
extra_reprConvNextV2DropPath.extra_reprH   s    }}T^^,,r)   )r   r.   )__name__
__module____qualname____firstlineno____doc__r   floatr0   r    Tensorr7   strr;   __static_attributes____classcell__r2   s   @r'   r+   r+   >   sQ    b#(5/ #T # #GU\\ Gell G-C - -r)   r+   c                   n   ^  \ rS rSrSrS\4U 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	ConvNextV2GRNL   z)GRN (Global Response Normalization) layerdimc                    > [         TU ]  5         [        R                  " [        R
                  " SSSU5      5      U l        [        R                  " [        R
                  " SSSU5      5      U l        g )Nr   )r/   r0   r   	Parameterr    zerosweightbias)r1   rK   r2   s     r'   r0   ConvNextV2GRN.__init__O   sL    ll5;;q!Q#<=LLQ1c!:;	r)   r4   r   c                     [         R                  R                  USSSS9nX"R                  SSS9S-   -  nU R                  X-  -  U R
                  -   U-   nU$ )N   )r   rS   T)ordrK   keepdim)rK   rU   ư>)r    linalgvector_normmeanrO   rP   )r1   r4   global_featuresnorm_featuress       r'   r7   ConvNextV2GRN.forwardT   se    ,,22=aV]a2b'+?+?BPT+?+UX\+\]}'DE		QTaar)   )rP   rO   )r=   r>   r?   r@   rA   intr0   r    FloatTensorr7   rE   rF   rG   s   @r'   rI   rI   L   s6    3<C <
U%6%6 5;L;L  r)   rI   c                   j   ^  \ rS rSrSrSU 4S jjrS\R                  S\R                  4S jrSr	U =r
$ )	ConvNextV2LayerNorm^   a5  LayerNorm that supports two data formats: channels_last (default) or channels_first.
The ordering of the dimensions in the inputs. channels_last corresponds to inputs with shape (batch_size, height,
width, channels) while channels_first corresponds to inputs with shape (batch_size, channels, height, width).
c                 V  > [         TU ]  5         [        R                  " [        R
                  " U5      5      U l        [        R                  " [        R                  " U5      5      U l        X l	        X0l
        U R                  S;  a  [        SU R                   35      eU4U l        g )N)channels_lastchannels_firstzUnsupported data format: )r/   r0   r   rM   r    onesrO   rN   rP   epsdata_formatNotImplementedErrornormalized_shape)r1   rj   rg   rh   r2   s       r'   r0   ConvNextV2LayerNorm.__init__d   s    ll5::.>#?@LL-=!>?	&#FF%(A$BRBRAS&TUU!1 3r)   xr   c                 P   U R                   S:X  aV  [        R                  R                  R	                  XR
                  U R                  U R                  U R                  5      nU$ U R                   S:X  a  UR                  nUR                  5       nUR                  SSS9nX-
  R                  S5      R                  SSS9nX-
  [        R                  " X@R                  -   5      -  nUR                  US9nU R                  S S 2S S 4   U-  U R                  S S 2S S 4   -   nU$ )Nrd   re   r   T)rU   rS   )r   )rh   r    r   
functional
layer_normrj   rO   rP   rg   r   rB   rZ   powsqrtto)r1   rl   input_dtypeuss        r'   r7   ConvNextV2LayerNorm.forwardn   s   .##..q2G2GVZV_V_aeaiaijA  !11''K	Aq$'AA##At#4A%**Q\22A;'AAtTM*Q.1dD=1IIAr)   )rP   rh   rg   rj   rO   )rW   rd   )r=   r>   r?   r@   rA   r0   r    rC   r7   rE   rF   rG   s   @r'   ra   ra   ^   s-    
4 %,,  r)   ra   c                   f   ^  \ rS rSrSrU 4S jrS\R                  S\R                  4S jr	Sr
U =r$ )ConvNextV2Embeddings}   zThis class is comparable to (and inspired by) the SwinEmbeddings class
found in src/transformers/models/swin/modeling_swin.py.
c                   > [         TU ]  5         [        R                  " UR                  UR
                  S   UR                  UR                  S9U l        [        UR
                  S   SSS9U l	        UR                  U l        g )Nr   kernel_sizestriderW   re   rg   rh   )
r/   r0   r   Conv2dnum_channelshidden_sizes
patch_sizepatch_embeddingsra   	layernormr1   configr2   s     r'   r0   ConvNextV2Embeddings.__init__   sr     "		!4!4Q!7VEVEV_e_p_p!
 -V-@-@-C[kl"//r)   pixel_valuesr   c                     UR                   S   nX R                  :w  a  [        S5      eU R                  U5      nU R	                  U5      nU$ )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r   r   
ValueErrorr   r   )r1   r   r   
embeddingss       r'   r7   ConvNextV2Embeddings.forward   sT    #))!,,,,w  **<8
^^J/
r)   )r   r   r   r=   r>   r?   r@   rA   r0   r    r_   rC   r7   rE   rF   rG   s   @r'   rx   rx   }   s/    0E$5$5 %,,  r)   rx   c                   j   ^  \ rS rSrSrSU 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	ConvNextV2Layer   a  This corresponds to the `Block` class in the original implementation.

There are two equivalent implementations: [DwConv, LayerNorm (channels_first), Conv, GELU,1x1 Conv]; all in (N, C,
H, W) (2) [DwConv, Permute to (N, H, W, C), LayerNorm (channels_last), Linear, GELU, Linear]; Permute back

The authors used (2) as they find it slightly faster in PyTorch.

Args:
    config ([`ConvNextV2Config`]): Model configuration class.
    dim (`int`): Number of input channels.
    drop_path (`float`): Stochastic depth rate. Default: 0.0.
c                   > [         TU ]  5         [        R                  " X"SSUS9U l        [        USS9U l        [        R                  " USU-  5      U l        [        UR                     U l        [        SU-  5      U l        [        R                  " SU-  U5      U l        US:  a  [        U5      U l        g [        R                   " 5       U l        g )N   r
   )r|   paddinggroupsrW   rg      r   )r/   r0   r   r   dwconvra   r   Linearpwconv1r   
hidden_actactrI   grnpwconv2r+   Identityr(   )r1   r   rK   r(   r2   s       r'   r0   ConvNextV2Layer.__init__   s    iia3O,Sd;yya#g.&++, S)yyS#.:Cc/+I6r{{}r)   r4   r   c                 L   UnU R                  U5      nUR                  SSSS5      nU R                  U5      nU R                  U5      nU R	                  U5      nU R                  U5      nU R                  U5      nUR                  SSSS5      nX R                  U5      -   nU$ )Nr   rS   r
   r   )r   permuter   r   r   r   r   r(   )r1   r4   r   rl   s       r'   r7   ConvNextV2Layer.forward   s    KK&IIaAq!NN1LLOHHQKHHQKLLOIIaAq!NN1%%r)   )r   r(   r   r   r   r   r   )r   r   rG   s   @r'   r   r      s0    
]U%6%6 5<<  r)   r   c                   j   ^  \ rS rSrSrSU 4S jjrS\R                  S\R                  4S jr	Sr
U =r$ )	ConvNextV2Stage   a  ConvNeXTV2 stage, consisting of an optional downsampling layer + multiple residual blocks.

Args:
    config ([`ConvNextV2Config`]): Model configuration class.
    in_channels (`int`): Number of input channels.
    out_channels (`int`): Number of output channels.
    depth (`int`): Number of residual blocks.
    drop_path_rates(`List[float]`): Stochastic depth rates for each layer.
c                 ~  > [         T	U ]  5         X#:w  d  US:  a9  [        R                  " [	        USSS9[        R
                  " X#XES95      U l        O[        R                  " 5       U l        U=(       d    S/U-  n[        R                  " [        U5       Vs/ s H  n[        XXx   S9PM     sn6 U l
        g s  snf )Nr   rW   re   r~   r{   r   )rK   r(   )r/   r0   r   
Sequentialra   r   downsampling_layerr   ranger   layers)
r1   r   in_channelsout_channelsr|   r}   depthdrop_path_ratesjr2   s
            r'   r0   ConvNextV2Stage.__init__   s    &&1*&(mm#KTGWX		+\'D#
 ')kkmD#):cUU]mm_dej_kl_kZ[of/BTU_kl
ls   B:r4   r   c                 J    U R                  U5      nU R                  U5      nU$ r.   r   r   r6   s     r'   r7   ConvNextV2Stage.forward   s&    //>M2r)   r   )rS   rS   rS   Nr   rG   s   @r'   r   r      s/    
U%6%6 5<<  r)   r   c                   t   ^  \ rS rSrU 4S jr  S	S\R                  S\\   S\\   S\	\
\4   4S jjrSrU =r$ )
ConvNextV2Encoder   c           
      .  > [         TU ]  5         [        R                  " 5       U l        [
        R                  " SUR                  [        UR                  5      SS9R                  UR                  5       Vs/ s H  nUR                  5       PM     nnUR                  S   n[        UR                  5       HT  nUR                  U   n[        UUUUS:  a  SOSUR                  U   X5   S9nU R                  R!                  U5        UnMV     g s  snf )Nr   cpu)r   rS   r   )r   r   r}   r   r   )r/   r0   r   
ModuleListstagesr    linspacedrop_path_ratesumdepthssplittolistr   r   
num_stagesr   append)	r1   r   rl   r   prev_chsiout_chsstager2   s	           r'   r0   ConvNextV2Encoder.__init__   s    mmo ^^Av'<'<c&-->PY^_eeflfsfst
t HHJt 	 
 &&q)v(()A))!,G#$$EqqmmA& / 2E KKu%H *
s   9Dr4   output_hidden_statesreturn_dictr   c                     U(       a  SOS n[        U R                  5       H  u  pVU(       a  XA4-   nU" U5      nM     U(       a  XA4-   nU(       d  [        S X4 5       5      $ [        UUS9$ )N c              3   .   #    U  H  oc  M  Uv   M     g 7fr.   r   ).0vs     r'   	<genexpr>,ConvNextV2Encoder.forward.<locals>.<genexpr>
  s     X$Fq$Fs   	)last_hidden_stater4   )	enumerater   tupler   )r1   r4   r   r   all_hidden_statesr   layer_modules          r'   r7   ConvNextV2Encoder.forward   sw     #7BD(5OA#$58H$H!(7M	  6   14D DX]$FXXX-++
 	
r)   )r   )FT)r=   r>   r?   r@   r0   r    r_   r   boolr   r   r   r7   rE   rF   rG   s   @r'   r   r      sY    0 05&*	
((
 'tn
 d^	

 
u44	5
 
r)   r   c                   ,    \ rS rSr\rSrSrS/rS r	Sr
g)ConvNextV2PreTrainedModeli  
convnextv2r   r   c                    [        U[        R                  [        R                  45      (       ak  UR                  R
                  R                  SU R                  R                  S9  UR                  b%  UR                  R
                  R                  5         gg[        U[        R                  [        45      (       aJ  UR                  R
                  R                  5         UR                  R
                  R                  S5        g[        U[        5      (       aI  UR                  R
                  R                  5         UR                  R
                  R                  5         gg)zInitialize the weightsr   )rZ   stdNg      ?)
isinstancer   r   r   rO   datanormal_r   initializer_rangerP   zero_	LayerNormra   fill_rI   )r1   modules     r'   _init_weights'ConvNextV2PreTrainedModel._init_weights  s    fryy"))455 MM&&CT[[5R5R&S{{&  &&( '/B CDDKK""$MM$$S)..MM$$&KK""$ /r)   r   N)r=   r>   r?   r@   r   config_classbase_model_prefixmain_input_name_no_split_modulesr   rE   r   r)   r'   r   r     s     #L$$O*+%r)   r   c                      ^  \ rS rSrU 4S jr\   S	S\\R                     S\\	   S\\	   S\
\\4   4S jj5       rSrU =r$ )
ConvNextV2Modeli)  c                    > [         TU ]  U5        Xl        [        U5      U l        [        U5      U l        [        R                  " UR                  S   UR                  S9U l        U R                  5         g )NrV   r   )r/   r0   r   rx   r   r   encoderr   r   r   layer_norm_epsr   	post_initr   s     r'   r0   ConvNextV2Model.__init__,  s^     .v6(0 f&9&9"&=6CXCXY 	r)   r   r   r   r   c                 f   Ub  UOU R                   R                  nUb  UOU R                   R                  nUc  [        S5      eU R	                  U5      nU R                  UUUS9nUS   nU R                  UR                  SS/5      5      nU(       d	  Xg4USS  -   $ [        UUUR                  S9$ )Nz You have to specify pixel_valuesr   r   r   rV   r   )r   pooler_outputr4   )
r   r   use_return_dictr   r   r   r   rZ   r   r4   )r1   r   r   r   embedding_outputencoder_outputsr   pooled_outputs           r'   r7   ConvNextV2Model.forward9  s     %9$D $++JjJj 	 &1%<k$++B]B]?@@??<8,,!5# ' 
 ,A. '8'='=r2h'GH%58KKK7/')77
 	
r)   )r   r   r   r   )NNN)r=   r>   r?   r@   r0   r   r   r    r_   r   r   r   r   r7   rE   rF   rG   s   @r'   r   r   )  sk      59/3&*	"
u001"
 'tn"
 d^	"

 
u>>	?"
 "
r)   r   z
    ConvNextV2 Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                      ^  \ rS rSrU 4S jr\    S
S\\R                     S\\R                     S\\
   S\\
   S\\\4   4
S jj5       rS	rU =r$ ) ConvNextV2ForImageClassificationi_  c                 6  > [         TU ]  U5        UR                  U l        [        U5      U l        UR                  S:  a.  [
        R                  " UR                  S   UR                  5      O[
        R                  " 5       U l	        U R                  5         g )Nr   rV   )r/   r0   
num_labelsr   r   r   r   r   r   
classifierr   r   s     r'   r0   )ConvNextV2ForImageClassification.__init__g  sy      ++)&1 FLEVEVYZEZBIIf))"-v/@/@A`b`k`k`m 	
 	r)   r   labelsr   r   r   c                 2   Ub  UOU R                   R                  nU R                  XUS9nU(       a  UR                  OUS   nU R	                  U5      nSnUGb  U R                   R
                  c  U R                  S:X  a  SU R                   l        OoU R                  S:  aN  UR                  [        R                  :X  d  UR                  [        R                  :X  a  SU R                   l        OSU R                   l        U R                   R
                  S:X  aI  [        5       n	U R                  S:X  a&  U	" UR                  5       UR                  5       5      nOU	" Xr5      nOU R                   R
                  S:X  a=  [        5       n	U	" UR                  SU R                  5      UR                  S5      5      nO,U R                   R
                  S:X  a  [        5       n	U	" Xr5      nU(       d  U4USS -   n
Ub  U4U
-   $ U
$ [!        UUUR"                  S	9$ )
ab  
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
    Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
    config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
    `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
Nr   r   
regressionsingle_label_classificationmulti_label_classificationrV   rS   )losslogitsr4   )r   r   r   r   r   problem_typer   r   r    longr^   r	   squeezer   viewr   r   r4   )r1   r   r  r   r   outputsr   r  r  loss_fctr&   s              r'   r7   (ConvNextV2ForImageClassification.forwardu  s    &1%<k$++B]B]//,gr/s1<--'!*/{{''/??a'/;DKK,__q(fllejj.HFLL\a\e\eLe/LDKK,/KDKK,{{''<7"9??a'#FNN$4fnn6FGD#F3D))-JJ+-B @&++b/R))-II,./Y,F)-)9TGf$EvE3!//
 	
r)   )r   r   r   )NNNN)r=   r>   r?   r@   r0   r   r   r    r_   
LongTensorr   r   r   r   r7   rE   rF   rG   s   @r'   r   r   _  s      59-1/3&*3
u0013
 ))*3
 'tn	3

 d^3
 
u::	;3
 3
r)   r   zT
    ConvNeXT V2 backbone, to be used with frameworks like DETR and MaskFormer.
    c            
       t   ^  \ rS rSrU 4S jr\  S	S\R                  S\\	   S\\	   S\
4S jj5       rSrU =r$ )
ConvNextV2Backbonei  c                   > [         TU ]  U5        [         TU ]	  U5        [        U5      U l        [        U5      U l        UR                  S   /UR                  -   U l        0 n[        U R                  U R                  5       H  u  p4[        USS9X#'   M     [        R                  " U5      U l        U R!                  5         g )Nr   re   )rh   )r/   r0   _init_backbonerx   r   r   r   r   num_featureszip_out_featureschannelsra   r   
ModuleDicthidden_states_normsr   )r1   r   r  r   r   r2   s        r'   r0   ConvNextV2Backbone.__init__  s     v&.v6(0#0034v7J7JJ !#&t'9'94==#IE)<\Wg)h& $J#%==1D#E  	r)   r   r   r   r   c                    Ub  UOU R                   R                  nUb  UOU R                   R                  nU R                  U5      nU R	                  USUS9nU(       a  UR
                  OUS   nSn[        U R                  U5       H0  u  pXR                  ;   d  M  U R                  U   " U	5      n	Xy4-  nM2     U(       d  U4n
U(       a  X4-  n
U
$ [        UU(       a  USS9$ SSS9$ )a
  
Examples:

```python
>>> from transformers import AutoImageProcessor, AutoBackbone
>>> import torch
>>> from PIL import Image
>>> import requests

>>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
>>> image = Image.open(requests.get(url, stream=True).raw)

>>> processor = AutoImageProcessor.from_pretrained("facebook/convnextv2-tiny-1k-224")
>>> model = AutoBackbone.from_pretrained("facebook/convnextv2-tiny-1k-224")

>>> inputs = processor(image, return_tensors="pt")
>>> outputs = model(**inputs)
```NTr   r   r   )feature_mapsr4   
attentions)r   r   r   r   r   r4   r  stage_namesout_featuresr  r   )r1   r   r   r   r   r  r4   r  r   hidden_stater&   s              r'   r7   ConvNextV2Backbone.forward  s   2 &1%<k$++B]B]$8$D $++JjJj 	  ??<8,,!%#  
 2=--'!*#&t'7'7#GE)))#77>|L/ $H
 "_F#**M%+?-
 	
EI
 	
r)   )r   r   r  r  )NN)r=   r>   r?   r@   r0   r   r    rC   r   r   r   r7   rE   rF   rG   s   @r'   r  r    sV    "  04&*	7
ll7
 'tn7
 d^	7

 
7
 7
r)   r  )r   r   r   r  )r   F)0rA   typingr   r   r   r    torch.utils.checkpointr   torch.nnr   r   r	   activationsr   modeling_outputsr   r   r   r   modeling_utilsr   utilsr   r   utils.backbone_utilsr   configuration_convnextv2r   
get_loggerr=   loggerrC   rB   r   r(   Moduler+   rI   ra   rx   r   r   r   r   r   r   r  __all__r   r)   r'   <module>r0     s     ) )    A A !  . , 1 6 
		H	%U\\ e T V[VbVb *- -BII $")) >299 0(bii (Xbii B-
		 -
` % % %, 1
/ 1
 1
h C
'@ C
C
L J
2M J
J
Z ur)   