
    hn}                        S SK r S SKJr  S SKJr  S SKJr  S SKJrJ	r	J
r
  S SKrS SKrS SKJs  Jr  S SKJrJr  S SKJrJrJr  S SKJr  S S	KJrJr  S S
KJrJr  S SKJ r   S SK!J"r"J#r#  S SK$J%r%  / SQr&S\'\(\(4   S\(S\(S\(S\'\(\(4   4
S jr)S\'\(\(4   S\(S\*\'\(\(4      4S jr+S\(S\(S\R$                  4S jr, " S S\RZ                  5      r. " S S\RZ                  5      r/ " S S\RZ                  5      r0 " S  S!\RZ                  5      r1 " S" S#\RZ                  5      r2 " S$ S%\RZ                  5      r3 " S& S'\RZ                  5      r4 " S( S)\RZ                  5      r5 " S* S+\RZ                  5      r6  S=S,\(S-\*\(   S.\*\(   S/\7S0\(S1\(S2\
\   S3\8S4\S\64S5 jjr9 " S6 S7\5      r:\" 5       \" S8\:Rv                  4S99SS:S;.S2\
\:   S3\8S4\S\64S< jj5       5       r<g)>    N)OrderedDict)Sequence)partial)AnyCallableOptional)nnTensor)register_modelWeightsWeightsEnum)_IMAGENET_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)Conv2dNormActivationSqueezeExcitation)StochasticDepth)ImageClassificationInterpolationMode)_log_api_usage_once)MaxVitMaxVit_T_Weightsmaxvit_t
input_sizekernel_sizestridepaddingreturnc                 R    U S   U-
  SU-  -   U-  S-   U S   U-
  SU-  -   U-  S-   4$ )Nr          )r   r   r   r   s       Q/var/www/auris/envauris/lib/python3.13/site-packages/torchvision/models/maxvit.py_get_conv_output_shaper$      sJ    	A	$q7{	2v=A	A	$q7{	2v=A     n_blocksc                     / n[        U SSS5      n[        U5       H"  n[        USSS5      nUR                  U5        M$     U$ )zQUtil function to check that the input size is correct for a MaxVit configuration.   r    r!   )r$   rangeappend)r   r&   shapesblock_input_shape_s        r#   _make_block_input_shapesr.   !   sL    F.z1aC8_23DaAN'(  Mr%   heightwidthc                    [         R                  " [         R                  " [         R                  " U 5      [         R                  " U5      /SS95      n[         R                  " US5      nUS S 2S S 2S 4   US S 2S S S 24   -
  nUR                  SSS5      R                  5       nUS S 2S S 2S4==   U S-
  -  ss'   US S 2S S 2S4==   US-
  -  ss'   US S 2S S 2S4==   SU-  S-
  -  ss'   UR                  S5      $ )Nij)indexingr!   r    r   )torchstackmeshgridarangeflattenpermute
contiguoussum)r/   r0   coordscoords_flatrelative_coordss        r#   _get_relative_position_indexr@   +   s    [[f)=u||E?R(S^bcdF--*K!!Q*-AtQJ0GGO%--aA6AACOAq!G
*Aq!G	)Aq!GE	A-r""r%   c                      ^  \ rS rSrSr SS\S\S\S\S\S\S	\R                  4   S
\S	\R                  4   S\SS4U 4S jjjr
S\S\4S jrSrU =r$ )MBConv6   a  MBConv: Mobile Inverted Residual Bottleneck.

Args:
    in_channels (int): Number of input channels.
    out_channels (int): Number of output channels.
    expansion_ratio (float): Expansion ratio in the bottleneck.
    squeeze_ratio (float): Squeeze ratio in the SE Layer.
    stride (int): Stride of the depthwise convolution.
    activation_layer (Callable[..., nn.Module]): Activation function.
    norm_layer (Callable[..., nn.Module]): Normalization function.
    p_stochastic_dropout (float): Probability of stochastic depth.
in_channelsout_channelsexpansion_ratiosqueeze_ratior   activation_layer.
norm_layerp_stochastic_dropoutr   Nc	                   > [         TU ]  5         U   US:g  =(       d    X:g  n	U	(       aQ  [        R                  " XSSSS9/n
US:X  a  [        R                  " SUSS9/U
-   n
[        R
                  " U
6 U l        O[        R                  " 5       U l        [        X#-  5      n[        X$-  5      nU(       a  [        USS9U l
        O[        R                  " 5       U l
        [        5       nU" U5      US	'   [        UUSSS
UUS S9US'   [        UUSUSUUUS S9	US'   [        X[        R                  S9US'   [        R                  " XSSS9US'   [        R
                  " U5      U l        g )Nr!   T)r   r   biasr    r(   r   r   r   rowmodepre_normr   )r   r   r   rH   rI   inplaceconv_a)r   r   r   rH   rI   groupsrR   conv_b)
activationsqueeze_excitation)rD   rE   r   rL   conv_c)super__init__r	   Conv2d	AvgPool2d
SequentialprojIdentityintr   stochastic_depthr   r   r   SiLUlayers)selfrD   rE   rF   rG   r   rH   rI   rJ   should_projr^   mid_channelssqz_channels_layers	__class__s                 r#   rZ   MBConv.__init__D   sh    	 	k@[%@IIkQqW[\]D{61MNQUUt,DIDI<9:<78$34Hu$UD!$&KKMD!-(5
0-!	
 1-!

 ):,acahah(i$%II,ghostmmG,r%   xc                 l    U R                  U5      nU R                  U R                  U5      5      nX!-   $ )z
Args:
    x (Tensor): Input tensor with expected layout of [B, C, H, W].
Returns:
    Tensor: Output tensor with expected layout of [B, C, H / stride, W / stride].
)r^   ra   rc   rd   rk   ress      r#   forwardMBConv.forward   s0     iil!!$++a.1wr%   )rc   r^   ra   )        )__name__
__module____qualname____firstlineno____doc__r`   floatr   r	   ModulerZ   r
   ro   __static_attributes____classcell__ri   s   @r#   rB   rB   6   s    , '*;-;- ;- 	;-
 ;- ;- #3		>2;- S"))^,;- $;- 
;- ;-z	 	F 	 	r%   rB   c                   t   ^  \ rS rSrSrS\S\S\SS4U 4S jjrS\R                  4S	 jr	S
\S\4S jr
SrU =r$ )$RelativePositionalMultiHeadAttention   zRelative Positional Multi-Head Attention.

Args:
    feat_dim (int): Number of input features.
    head_dim (int): Number of features per head.
    max_seq_len (int): Maximum sequence length.
feat_dimhead_dimmax_seq_lenr   Nc                 V  > [         TU ]  5         X-  S:w  a  [        SU SU 35      eX-  U l        X l        [        [        R                  " U5      5      U l        X0l	        [        R                  " XR                  U R                  -  S-  5      U l        US-  U l        [        R                  " U R                  U R                  -  U5      U l        [        R                  R!                  ["        R$                  " SU R                  -  S-
  SU R                  -  S-
  -  U R                  4["        R&                  S95      U l        U R+                  S	[-        U R                  U R                  5      5        ["        R                  R.                  R1                  U R(                  S
S9  g )Nr   z
feat_dim: z  must be divisible by head_dim: r(   g      r    r!   )dtyperelative_position_index{Gz?std)rY   rZ   
ValueErrorn_headsr   r`   mathsqrtsizer   r	   Linearto_qkvscale_factormerge	parameter	Parameterr5   emptyfloat32relative_position_bias_tableregister_bufferr@   inittrunc_normal_)rd   r   r   r   ri   s       r#   rZ   -RelativePositionalMultiHeadAttention.__init__   sK    	!#z(3ST\S]^__+ 		+./	&ii,,*F*JK$dNYYt}}t||;XF
,.LL,B,BKK!dii-!+DII0ABDLLQY^YfYfg-
) 	68TUYU^U^`d`i`i8jk##D$E$E4#Pr%   c                    U R                   R                  S5      nU R                  U   R                  U R                  U R                  S5      nUR	                  SSS5      R                  5       nUR                  S5      $ )Nr4   r    r   r!   )r   viewr   r   r:   r;   	unsqueeze)rd   
bias_indexrelative_biass      r#   get_relative_positional_biasARelativePositionalMultiHeadAttention.get_relative_positional_bias   ss    1166r:
99*EJJ4K[K[]a]m]moqr%--aA6AAC&&q))r%   rk   c                    UR                   u  p#pEU R                  U R                  pvU R                  U5      n[        R
                  " USSS9u  pnU	R                  X#XFU5      R                  SSSSS5      n	U
R                  X#XFU5      R                  SSSSS5      n
UR                  X#XFU5      R                  SSSSS5      nXR                  -  n
[        R                  " SX5      nU R                  5       n[        R                  " X-   SS9n[        R                  " S	X5      nUR                  SSSSS5      R                  X#XE5      nU R                  U5      nU$ )
z
Args:
    x (Tensor): Input tensor with expected layout of [B, G, P, D].
Returns:
    Tensor: Output tensor with expected layout of [B, G, P, D].
r(   r4   )dimr   r!   r       z!B G H I D, B G H J D -> B G H I Jz!B G H I J, B G H J D -> B G H I D)shaper   r   r   r5   chunkreshaper:   r   einsumr   Fsoftmaxr   )rd   rk   BGPDHDHqkvqkvdot_prodpos_biasouts                  r#   ro   ,RelativePositionalMultiHeadAttention.forward   s8    WW
admm2kk!n++c1"-aIIaA"%--aAq!<IIaA"%--aAq!<IIaA"%--aAq!<!!!<< CQJ44699X0b9ll>Lkk!Q1a(00q<jjo
r%   )r   r   r   r   r   r   r   r   )rr   rs   rt   ru   rv   r`   rZ   r5   r
   r   ro   ry   rz   r{   s   @r#   r}   r}      s`    QQ Q 	Q
 
Q8*ell * F  r%   r}   c                   v   ^  \ rS rSrSrS\S\SS4U 4S jjrS\R                  S\R                  4S	 jr	S
r
U =r$ )SwapAxes   zPermute the axes of a tensor.abr   Nc                 :   > [         TU ]  5         Xl        X l        g N)rY   rZ   r   r   )rd   r   r   ri   s      r#   rZ   SwapAxes.__init__   s    r%   rk   c                 \    [         R                  " XR                  U R                  5      nU$ r   )r5   swapaxesr   r   rm   s      r#   ro   SwapAxes.forward   s    nnQ/
r%   )r   r   )rr   rs   rt   ru   rv   r`   rZ   r5   r
   ro   ry   rz   r{   s   @r#   r   r      s@    '# # $ 
 %,,  r%   r   c                   F   ^  \ rS rSrSrS	U 4S jjrS\S\S\4S jrSr	U =r
$ )
WindowPartition   z:
Partition the input tensor into non-overlapping windows.
r   c                 "   > [         TU ]  5         g r   rY   rZ   rd   ri   s    r#   rZ   WindowPartition.__init__       r%   rk   pc                     UR                   u  p4pVUnUR                  X4XW-  XvU-  U5      nUR                  SSSSSS5      nUR                  X5U-  Xg-  -  Xw-  U5      nU$ )z
Args:
    x (Tensor): Input tensor with expected layout of [B, C, H, W].
    p (int): Number of partitions.
Returns:
    Tensor: Output tensor with expected layout of [B, H/P, W/P, P*P, C].
r   r    r   r(      r!   r   r   r:   )rd   rk   r   r   Cr   Wr   s           r#   ro   WindowPartition.forward   sl     WW
aIIaAFAAvq1IIaAq!Q'IIaq&QV,aeQ7r%   r"   r   Nrr   rs   rt   ru   rv   rZ   r
   r`   ro   ry   rz   r{   s   @r#   r   r      s,     C F  r%   r   c            
       N   ^  \ rS rSrSrSU 4S jjrS\S\S\S\S\4
S	 jrS
r	U =r
$ )WindowDepartition   zg
Departition the input tensor of non-overlapping windows into a feature volume of layout [B, C, H, W].
r   c                 "   > [         TU ]  5         g r   r   r   s    r#   rZ   WindowDepartition.__init__  r   r%   rk   r   h_partitionsw_partitionsc                     UR                   u  pVpxUn	X4pUR                  XZXX5      nUR                  SSSSSS5      nUR                  XXX-  X-  5      nU$ )a2  
Args:
    x (Tensor): Input tensor with expected layout of [B, (H/P * W/P), P*P, C].
    p (int): Number of partitions.
    h_partitions (int): Number of vertical partitions.
    w_partitions (int): Number of horizontal partitions.
Returns:
    Tensor: Output tensor with expected layout of [B, C, H, W].
r   r   r!   r(   r    r   r   )rd   rk   r   r   r   r   r   PPr   r   HPWPs               r#   ro   WindowDepartition.forward  s`     ggbBIIaRA)IIaAq!Q'IIaBFBF+r%   r"   r   r   r{   s   @r#   r   r      s;     C s # RX  r%   r   c                      ^  \ rS rSrSrS\S\S\S\S\\\4   S\S	\S
\	R                  4   S\S
\	R                  4   S\S\S\SS4U 4S jjrS\S\4S jrSrU =r$ )PartitionAttentionLayeri  av  
Layer for partitioning the input tensor into non-overlapping windows and applying attention to each window.

Args:
    in_channels (int): Number of input channels.
    head_dim (int): Dimension of each attention head.
    partition_size (int): Size of the partitions.
    partition_type (str): Type of partitioning to use. Can be either "grid" or "window".
    grid_size (Tuple[int, int]): Size of the grid to partition the input tensor into.
    mlp_ratio (int): Ratio of the  feature size expansion in the MLP layer.
    activation_layer (Callable[..., nn.Module]): Activation function to use.
    norm_layer (Callable[..., nn.Module]): Normalization function to use.
    attention_dropout (float): Dropout probability for the attention layer.
    mlp_dropout (float): Dropout probability for the MLP layer.
    p_stochastic_dropout (float): Probability of dropping out a partition.
rD   r   partition_sizepartition_type	grid_size	mlp_ratiorH   .rI   attention_dropoutmlp_dropoutrJ   r   Nc           	        > [         TU ]  5         X-  U l        X l        US   U-  U l        X@l        XPl        US;  a  [        S5      eUS:X  a  X0R                  sU l        U l	        OU R                  UsU l        U l	        [        5       U l        [        5       U l        US:X  a  [        SS5      O[        R                   " 5       U l        US:X  a  [        SS5      O[        R                   " 5       U l        [        R&                  " U" U5      [)        XUS-  5      [        R*                  " U	5      5      U l        [        R&                  " [        R.                  " U5      [        R0                  " XU-  5      U" 5       [        R0                  " X-  U5      [        R*                  " U
5      5      U l        [5        US	S
9U l        g )Nr   )gridwindowz0partition_type must be either 'grid' or 'window'r   r   r    rN   rO   )rY   rZ   r   r   n_partitionsr   r   r   r   gr   partition_opr   departition_opr   r	   r_   partition_swapdepartition_swapr]   r}   Dropout
attn_layer	LayerNormr   	mlp_layerr   stochastic_dropout)rd   rD   r   r   r   r   r   rH   rI   r   r   rJ   ri   s               r#   rZ    PartitionAttentionLayer.__init__-  su   " 	". %aLN:,"!33OPPX%+->->NDFDF!..NDFDF+-/12@F2Jhr2.PRP[P[P]4Bf4LR 0RTR]R]R_--{# 1XYHYZJJ()
 LL%IIk#:;IIk-{;JJ{#
 #22FU"Sr%   rk   c                    U R                   S   U R                  -  U R                   S   U R                  -  p2[        R                  " U R                   S   U R                  -  S:H  =(       a    U R                   S   U R                  -  S:H  SR	                  U R                   U R                  5      5        U R                  XR                  5      nU R                  U5      nXR                  U R                  U5      5      -   nXR                  U R                  U5      5      -   nU R                  U5      nU R                  XR                  X#5      nU$ )z
Args:
    x (Tensor): Input tensor with expected layout of [B, C, H, W].
Returns:
    Tensor: Output tensor with expected layout of [B, C, H, W].
r   r!   z[Grid size must be divisible by partition size. Got grid size of {} and partition size of {})r   r   r5   _assertformatr   r   r   r   r   r   r   )rd   rk   ghgws       r#   ro   PartitionAttentionLayer.forwardg  s    "dff,dnnQ.?466.IBNN1&!+Oq0ADFF0Ja0Oipp	
 a("''(:;;''q(9::!!!$6622r%   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   )rr   rs   rt   ru   rv   r`   strtupler   r	   rx   rw   rZ   r
   ro   ry   rz   r{   s   @r#   r   r     s    "8T8T 8T
 8T 8T c?8T 8T #3		>28T S"))^,8T !8T 8T $8T  
!8Tt F  r%   r   c                      ^  \ rS rSrSrS\S\S\S\S\S\S	\R                  4   S
\S	\R                  4   S\S\S\S\S\S\S\
\\4   SS4U 4S jjrS\S\4S jrSrU =r$ )MaxVitLayeri  a  
MaxVit layer consisting of a MBConv layer followed by a PartitionAttentionLayer with `window` and a PartitionAttentionLayer with `grid`.

Args:
    in_channels (int): Number of input channels.
    out_channels (int): Number of output channels.
    expansion_ratio (float): Expansion ratio in the bottleneck.
    squeeze_ratio (float): Squeeze ratio in the SE Layer.
    stride (int): Stride of the depthwise convolution.
    activation_layer (Callable[..., nn.Module]): Activation function.
    norm_layer (Callable[..., nn.Module]): Normalization function.
    head_dim (int): Dimension of the attention heads.
    mlp_ratio (int): Ratio of the MLP layer.
    mlp_dropout (float): Dropout probability for the MLP layer.
    attention_dropout (float): Dropout probability for the attention layer.
    p_stochastic_dropout (float): Probability of stochastic depth.
    partition_size (int): Size of the partitions.
    grid_size (Tuple[int, int]): Size of the input feature grid.
rD   rE   rG   rF   r   rI   .rH   r   r   r   r   rJ   r   r   r   Nc                 "  > [         TU ]  5         [        5       n[        UUUUUUUUS9US'   [	        UUUSUU	U[
        R                  UU
US9US'   [	        UUUSUU	U[
        R                  UU
US9US'   [
        R                  " U5      U l        g )N)rD   rE   rF   rG   r   rH   rI   rJ   MBconvr   )rD   r   r   r   r   r   rH   rI   r   r   rJ   window_attentionr   grid_attention)	rY   rZ   r   rB   r   r	   r   r]   rc   )rd   rD   rE   rG   rF   r   rI   rH   r   r   r   r   rJ   r   r   rc   ri   s                   r#   rZ   MaxVitLayer.__init__  s    * 	)m "#%+'-!!5	
x &=$)#-||/#!5&
!" $;$)!-||/#!5$
  mmF+r%   rk   c                 (    U R                  U5      nU$ zu
Args:
    x (Tensor): Input tensor of shape (B, C, H, W).
Returns:
    Tensor: Output tensor of shape (B, C, H, W).
rc   )rd   rk   s     r#   ro   MaxVitLayer.forward  s     KKNr%   r
  )rr   rs   rt   ru   rv   r`   rw   r   r	   rx   r   rZ   r
   ro   ry   rz   r{   s   @r#   r  r    s    (?, ?, 	?,
 ?, ?, ?, S"))^,?, #3		>2?, ?, ?, ?, !?,  $!?,$ %?,& c?'?,( 
)?,B F  r%   r  c                      ^  \ rS rSrSrS\S\S\S\S\S\R                  4   S	\S\R                  4   S
\S\S\S\S\S\
\\4   S\S\\   SS4U 4S jjrS\S\4S jrSrU =r$ )MaxVitBlocki  a  
A MaxVit block consisting of `n_layers` MaxVit layers.

 Args:
    in_channels (int): Number of input channels.
    out_channels (int): Number of output channels.
    expansion_ratio (float): Expansion ratio in the bottleneck.
    squeeze_ratio (float): Squeeze ratio in the SE Layer.
    activation_layer (Callable[..., nn.Module]): Activation function.
    norm_layer (Callable[..., nn.Module]): Normalization function.
    head_dim (int): Dimension of the attention heads.
    mlp_ratio (int): Ratio of the MLP layer.
    mlp_dropout (float): Dropout probability for the MLP layer.
    attention_dropout (float): Dropout probability for the attention layer.
    p_stochastic_dropout (float): Probability of stochastic depth.
    partition_size (int): Size of the partitions.
    input_grid_size (Tuple[int, int]): Size of the input feature grid.
    n_layers (int): Number of layers in the block.
    p_stochastic (List[float]): List of probabilities for stochastic depth for each layer.
rD   rE   rG   rF   rI   .rH   r   r   r   r   r   input_grid_sizen_layersp_stochasticr   Nc                 p  > [         TU ]  5         [        U5      U:X  d  [        SU SU S35      e[        R
                  " 5       U l        [        USSSS9U l        [        U5       HL  u  nnUS:X  a  SOSnU =R                  [        US:X  a  UOUUUUUUUUUU	U
UU R                  US	9/-  sl        MN     g )
Nz'p_stochastic must have length n_layers=z, got p_stochastic=.r(   r    r!   rM   r   )rD   rE   rG   rF   r   rI   rH   r   r   r   r   r   r   rJ   )rY   rZ   lenr   r	   
ModuleListrc   r$   r   	enumerater  )rd   rD   rE   rG   rF   rI   rH   r   r   r   r   r   r  r  r  idxr   r   ri   s                     r#   rZ   MaxVitBlock.__init__  s    , 	< H,FxjPcdpcqqrsttmmo/QWXbcd-FC(QFKK/2ax\!-"/$3!)%5%' +&7#1"nn)* K .r%   rk   c                 <    U R                    H  nU" U5      nM     U$ r	  r
  )rd   rk   layers      r#   ro   MaxVitBlock.forward-  s      [[EaA !r%   )r   rc   )rr   rs   rt   ru   rv   r`   rw   r   r	   rx   r   listrZ   r
   ro   ry   rz   r{   s   @r#   r  r    s    *1 1 	1
 1 1 S"))^,1 #3		>21 1 1 1 !1  !1" sCx#1& '1( 5k)1* 
+1f	 	F 	 	r%   r  c            !         ^  \ rS rSrSrS\R                  SSSSSS4S\\\4   S	\S
\S\	\   S\	\   S\S\
S\\S\R                  4      S\S\R                  4   S\
S\
S\S\
S\
S\SS4 U 4S jjjrS\S\4S jrS rSrU =r$ )r   i9  a1  
Implements MaxVit Transformer from the `MaxViT: Multi-Axis Vision Transformer <https://arxiv.org/abs/2204.01697>`_ paper.
Args:
    input_size (Tuple[int, int]): Size of the input image.
    stem_channels (int): Number of channels in the stem.
    partition_size (int): Size of the partitions.
    block_channels (List[int]): Number of channels in each block.
    block_layers (List[int]): Number of layers in each block.
    stochastic_depth_prob (float): Probability of stochastic depth. Expands to a list of probabilities for each layer that scales linearly to the specified value.
    squeeze_ratio (float): Squeeze ratio in the SE Layer. Default: 0.25.
    expansion_ratio (float): Expansion ratio in the MBConv bottleneck. Default: 4.
    norm_layer (Callable[..., nn.Module]): Normalization function. Default: None (setting to None will produce a `BatchNorm2d(eps=1e-3, momentum=0.01)`).
    activation_layer (Callable[..., nn.Module]): Activation function Default: nn.GELU.
    head_dim (int): Dimension of the attention heads.
    mlp_ratio (int): Expansion ratio of the MLP layer. Default: 4.
    mlp_dropout (float): Dropout probability for the MLP layer. Default: 0.0.
    attention_dropout (float): Dropout probability for the attention layer. Default: 0.0.
    num_classes (int): Number of classes. Default: 1000.
Ng      ?r   rq   i  r   stem_channelsr   block_channelsblock_layersr   stochastic_depth_probrI   .rH   rG   rF   r   r   r   num_classesr   c                   > [         TU ]  5         [        U 5        SnUc  [        [        R
                  SSS9n[        U[        U5      5      n[        U5       H6  u  nnUS   U-  S:w  d  US   U-  S:w  d  M   [        SU SU S	U S
U S3	5      e   [        R                  " [        UUSSUU	SS S9[        X"SSS S SS95      U l        [        USSSS9nX0l        [        R                  " 5       U l        U/US S -   nUn["        R$                  " SU['        U5      5      R)                  5       nSn[+        UUU5       HZ  u  nnnU R                   R-                  [/        UUU
UUU	UUUUUUUUUUU-    S95        U R                   S   R0                  nUU-  nM\     [        R                  " [        R2                  " S5      [        R4                  " 5       [        R6                  " US   5      [        R8                  " US   US   5      [        R:                  " 5       [        R8                  " US   USS95      U l        U R?                  5         g )Nr(   gMbP?g{Gz?)epsmomentumr   r!   zInput size z
 of block z$ is not divisible by partition size zx. Consider changing the partition size or the input size.
Current configuration yields the following block input sizes: r  r    F)r   rI   rH   rL   rR   T)r   rI   rH   rL   rM   r4   )rD   rE   rG   rF   rI   rH   r   r   r   r   r   r  r  r  )rL   ) rY   rZ   r   r   r	   BatchNorm2dr.   r  r  r   r]   r   stemr$   r   r  blocksnplinspacer<   tolistzipr*   r  r   AdaptiveAvgPool2dFlattenr   r   Tanh
classifier_init_weights)rd   r   r  r   r  r  r   r   rI   rH   rG   rF   r   r   r   r!  input_channelsblock_input_sizesr  block_input_sizerD   rE   r  p_idx
in_channelout_channel
num_layersri   s                              r#   rZ   MaxVit.__init__N  s   : 	D!  TDIJ
 5Z^ATU%./@%A!C!"^3q8<LQ<OR`<`de<e !"2!3:cUBfgufv wUUfTgghj  &B MM %!1	 !ad]ahl
	" ,JAaYZ[
, mmo$os(;;%
 {{1&;S=NOVVX36{LR^3_/JZKK *!,"/$3)%5%' +&7#1$.'!-eej6H!I$ R22JZE) 4`0 --  #JJLLL+,IInR(.*<=GGIIInR(+EB
 	r%   rk   c                     U R                  U5      nU R                   H  nU" U5      nM     U R                  U5      nU$ r   )r&  r'  r/  )rd   rk   blocks      r#   ro   MaxVit.forward  s9    IIaL[[EaA !OOAr%   c                 (   U R                  5        GH}  n[        U[        R                  5      (       ab  [        R                  R                  UR                  SS9  UR                  b+  [        R                  R                  UR                  5        M  M  [        U[        R                  5      (       aV  [        R                  R                  UR                  S5        [        R                  R                  UR                  S5        M  [        U[        R                  5      (       d  GM  [        R                  R                  UR                  SS9  UR                  c  GMT  [        R                  R                  UR                  5        GM     g )Nr   r   r!   r   )modules
isinstancer	   r[   r   normal_weightrL   zeros_r%  	constant_r   )rd   ms     r#   r0  MaxVit._init_weights  s    A!RYY''d366%GGNN166* &Ar~~..!!!((A.!!!&&!,Aryy))d366%GGNN166*  r%   )r'  r/  r   r&  )rr   rs   rt   ru   rv   r	   GELUr   r`   r  rw   r   r   rx   rZ   r
   ro   r0  ry   rz   r{   s   @r#   r   r   9  s0   J :>57WW#!" #&7t #s(Ot
 t t S	t 3it t  %t" Xc299n56#t$ #3		>2%t( )t* +t. /t0 1t2 !3t6 7t8 
9t tl F + +r%   r   r  r  r  r   r   r   weightsprogresskwargsc                 d   Ube  [        US[        UR                  S   5      5        UR                  S   S   UR                  S   S   :X  d   e[        USUR                  S   5        UR                  SS5      n	[	        SU UUUUUU	S.UD6n
Ub  U
R                  UR                  US	S
95        U
$ )Nr!  
categoriesmin_sizer   r!   r      rM  )r  r  r  r   r   r   r   T)rG  
check_hashr"   )r   r  metapopr   load_state_dictget_state_dict)r  r  r  r   r   r   rF  rG  rH  r   models              r#   _maxvitrT    s    $ fmSl9S5TU||J'*gll:.Fq.IIIIflGLL4LML*5J 	#%!3%	 	E g44hSW4XYLr%   c                   f    \ rS rSr\" S\" \SS\R                  S9\	SSSSS	S
S.0SSSS.S9r
\
rSrg)r   i  z9https://download.pytorch.org/models/maxvit_t-bc5ab103.pthrM  )	crop_sizeresize_sizeinterpolationirL  zLhttps://github.com/pytorch/vision/tree/main/references/classification#maxvitzImageNet-1KgT@g|?5.X@)zacc@1zacc@5gZd;@gK7]@zThese weights reproduce closely the results of the paper using a similar training recipe.
            They were trained with a BatchNorm2D momentum of 0.99 instead of the more correct 0.01.)rJ  
num_paramsrK  recipe_metrics_ops
_file_size_docs)url
transformsrO  r"   N)rr   rs   rt   ru   r   r   r   r   BICUBICr   IMAGENET1K_V1DEFAULTry   r"   r%   r#   r   r     sb    G3CO`OhOh
 /""d##  !g
M. Gr%   r   
pretrained)rF  T)rF  rG  c                 \    [         R                  U 5      n [        SS/ SQ/ SQSSSU US.UD6$ )	aF  
Constructs a maxvit_t architecture from
`MaxViT: Multi-Axis Vision Transformer <https://arxiv.org/abs/2204.01697>`_.

Args:
    weights (:class:`~torchvision.models.MaxVit_T_Weights`, optional): The
        pretrained weights to use. See
        :class:`~torchvision.models.MaxVit_T_Weights` below for
        more details, and possible values. By default, no pre-trained
        weights are used.
    progress (bool, optional): If True, displays a progress bar of the
        download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.maxvit.MaxVit``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/maxvit.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.MaxVit_T_Weights
    :members:
@   )rf        i   )r    r    r   r        g?   )r  r  r  r   r   r   rF  rG  r"   )r   verifyrT  )rF  rG  rH  s      r#   r   r     sH    . %%g.G 
*!!
 
 
r%   )NF)=r   collectionsr   collections.abcr   	functoolsr   typingr   r   r   numpyr(  r5   torch.nn.functionalr	   
functionalr   r
   torchvision.models._apir   r   r   torchvision.models._metar   torchvision.models._utilsr   r   torchvision.ops.miscr   r    torchvision.ops.stochastic_depthr   torchvision.transforms._presetsr   r   torchvision.utilsr   __all__r   r`   r$   r  r.   r@   rx   rB   r}   r   r   r   r   r  r  r   rw   boolrT  r   rb  r   r"   r%   r#   <module>r|     s[    # $  * *      H H 9 T H < R 1uS#X S RU `c hmnqsvnvhw sCx C DQVWZ\_W_Q`La # #S #U\\ #TRYY TnF299 FR
ryy 
bii 4		 <ebii eP^")) ^BR")) Rj^+RYY ^+Z &*'' I	'
 s)' !' ' ' k"' ' '  !'T{ 6 ,0@0N0N!OP6:T !"23 !d !]` !ek ! Q !r%   