
    h|}                     J   % S SK r S SKJr  S SKJr  S SKJrJrJrJ	r	  S SK
r
S SKJr  SSKJrJr  SSKJrJr  SSKJr  S	S
KJrJrJr  S	SKJr  S	SKJrJr  / SQr " S S\5      r " S S\5      r  " S S\RB                  5      r" " S S\RB                  5      r# " S S\RB                  5      r$S\%S\%S\%S\%S\%S\	\   S\&S\S \$4S! jr'S"\0r(\)\*\4   \+S#'   0 \(ES$S%S&.Er, " S' S(\5      r- " S) S*\5      r. " S+ S,\5      r/ " S- S.\5      r0 " S/ S0\5      r1\" 5       \" S1\-Rd                  4S29SS3S4.S\	\-   S\&S\S \$4S5 jj5       5       r3\" 5       \" S1\.Rd                  4S29SS3S4.S\	\.   S\&S\S \$4S6 jj5       5       r4\" 5       \" S1\/Rd                  4S29SS3S4.S\	\/   S\&S\S \$4S7 jj5       5       r5\" 5       \" S1\0Rd                  4S29SS3S4.S\	\0   S\&S\S \$4S8 jj5       5       r6\" 5       \" S9S29SS3S4.S\	\1   S\&S\S \$4S: jj5       5       r7  SAS;\%S\%S<S=S>\*S?\&S S=4S@ jjr8g)B    N)OrderedDict)partial)AnyCallable
NamedTupleOptional   )Conv2dNormActivationMLP)ImageClassificationInterpolationMode)_log_api_usage_once   )register_modelWeightsWeightsEnum)_IMAGENET_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)VisionTransformerViT_B_16_WeightsViT_B_32_WeightsViT_L_16_WeightsViT_L_32_WeightsViT_H_14_Weightsvit_b_16vit_b_32vit_l_16vit_l_32vit_h_14c                       \ rS rSr% \\S'   \\S'   \\S'   \R                  r\	S\R                  4   \S'   \R                  r\	S\R                  4   \S'   Srg	)
ConvStemConfig    out_channelskernel_sizestride.
norm_layeractivation_layer N)__name__
__module____qualname____firstlineno__int__annotations__nnBatchNorm2dr'   r   ModuleReLUr(   __static_attributes__r)       ]/var/www/auris/envauris/lib/python3.13/site-packages/torchvision/models/vision_transformer.pyr"   r"       sJ    K+->>Jbii(913hsBII~.8r5   r"   c                   L   ^  \ rS rSrSrSrS\S\S\4U 4S jjrU 4S jr	S	r
U =r$ )
MLPBlock(   zTransformer MLP block.r	   in_dimmlp_dimdropoutc                 v  > [         TU ]  XU/[        R                  S US9  U R	                  5        H  n[        U[        R                  5      (       d  M$  [        R                  R                  UR                  5        UR                  c  M\  [        R                  R                  UR                  SS9  M     g )N)r(   inplacer<   ư>std)super__init__r0   GELUmodules
isinstanceLinearinitxavier_uniform_weightbiasnormal_)selfr:   r;   r<   m	__class__s        r6   rC   MLPBlock.__init__-   s}    6!2RWWVZdklA!RYY''''166%GGOOAFFO5	  r5   c           	        > UR                  SS 5      nUb  US:  aN  [        S5       H?  n	S H6  n
U SU	S-    SU
 3nU SU	-   SU
 3nX;   d  M#  UR                  U5      X'   M8     MA     [        TU ]  UUUUUUU5        g )Nversionr	   )rJ   rK   linear_r   .   )getrangepoprB   _load_from_state_dict)rM   
state_dictprefixlocal_metadatastrictmissing_keysunexpected_keys
error_msgsrR   itypeold_keynew_keyrO   s                r6   rY   MLPBlock._load_from_state_dict6   s     !$$Y5?gk1X.D!'!uAdV<G!'1Qtf5G,.8nnW.E
+	 /  	%	
r5   r)   )r*   r+   r,   r-   __doc___versionr.   floatrC   rY   r4   __classcell__rO   s   @r6   r8   r8   (   s/     H6s 6S 65 6
 
r5   r8   c                      ^  \ rS rSrSr\" \R                  SS94S\S\S\S\	S	\	S
\
S\R                  R                  4   4U 4S jjjrS\R                  4S jrSrU =r$ )EncoderBlockV   zTransformer encoder block.r?   eps	num_heads
hidden_dimr;   r<   attention_dropoutr'   .c                    > [         TU ]  5         Xl        U" U5      U l        [        R
                  " X!USS9U l        [        R                  " U5      U l        U" U5      U l	        [        X#U5      U l        g )NT)r<   batch_first)rB   rC   rp   ln_1r0   MultiheadAttentionself_attentionDropoutr<   ln_2r8   mlp)rM   rp   rq   r;   r<   rr   r'   rO   s          r6   rC   EncoderBlock.__init__Y   sh     	" z*	 33JSdrvwzz'* z*	J9r5   inputc                 *   [         R                  " UR                  5       S:H  SUR                   35        U R	                  U5      nU R                  X"USS9u  p#U R                  U5      nX!-   nU R                  U5      nU R                  U5      nX$-   $ )NrU   2Expected (batch_size, seq_length, hidden_dim) got F)need_weights)	torch_assertdimshaperu   rw   r<   ry   rz   )rM   r|   x_ys        r6   forwardEncoderBlock.forwardn   s    eiikQ&*\]b]h]h\i(jkIIe""1"?LLOIIIaLHHQKur5   )r<   ru   ry   rz   rp   rw   r*   r+   r,   r-   rf   r   r0   	LayerNormr.   rh   r   r   r2   rC   Tensorr   r4   ri   rj   s   @r6   rl   rl   V   s    $ 6=R\\t5T:: : 	:
 : !: S%((//12: :*	U\\ 	 	r5   rl   c                      ^  \ rS rSrSr\" \R                  SS94S\S\S\S\S	\S
\	S\	S\
S\R                  R                  4   4U 4S jjjrS\R                  4S jrSrU =r$ )Encoderz   z?Transformer Model Encoder for sequence to sequence translation.r?   rn   
seq_length
num_layersrp   rq   r;   r<   rr   r'   .c	           	        > [         TU ]  5         [        R                  " [        R
                  " SX5      R                  SS95      U l        [        R                  " U5      U l	        [        5       n	[        U5       H  n
[        UUUUUU5      U	SU
 3'   M     [        R                  " U	5      U l        U" U5      U l        g )Nr   g{Gz?r@   encoder_layer_)rB   rC   r0   	Parameterr   emptyrL   pos_embeddingrx   r<   r   rW   rl   
Sequentiallayersln)rM   r   r   rp   rq   r;   r<   rr   r'   r   ra   rO   s              r6   rC   Encoder.__init__}   s     	  \\%++a*P*X*X]a*X*bczz'*.9mz"A+7!,F^A3'( # mmF+Z(r5   r|   c                     [         R                  " UR                  5       S:H  SUR                   35        XR                  -   nU R                  U R                  U R                  U5      5      5      $ )NrU   r~   )r   r   r   r   r   r   r   r<   )rM   r|   s     r6   r   Encoder.forward   sZ    eiikQ&*\]b]h]h\i(jk***wwt{{4<<#6788r5   )r<   r   r   r   r   rj   s   @r6   r   r   z   s    I 6=R\\t5T)) ) 	)
 ) ) ) !) S%((//12) ):9U\\ 9 9r5   r   c                   .  ^  \ rS rSrSrSSSS\" \R                  SS9S4S\S	\S
\S\S\S\S\	S\	S\S\
\   S\S\R                  R                  4   S\
\\      4U 4S jjjrS\R"                  S\R"                  4S jrS\R"                  4S jrSrU =r$ )r      z;Vision Transformer as per https://arxiv.org/abs/2010.11929.        i  Nr?   rn   
image_size
patch_sizer   rp   rq   r;   r<   rr   num_classesrepresentation_sizer'   .conv_stem_configsc                   > [         TU ]  5         [        U 5        [        R                  " X-  S:H  S5        Xl        X l        XPl        X`l        Xl	        Xpl
        Xl        Xl        Xl        Ub  [        R                  " 5       nSn[!        U5       He  u  nnUR#                  SU 3[%        UUR&                  UR(                  UR*                  UR                  UR,                  S95        UR&                  nMg     UR#                  S[        R.                  " XSS95        Xl        O[        R.                  " SXRUS	9U l        X-  S
-  n[        R2                  " [        R4                  " SSU5      5      U l        US-  n[9        UUUUUUUU5      U l        UU l        [?        5       nU
c  [        R@                  " XY5      US'   OJ[        R@                  " XZ5      US'   [        RB                  " 5       US'   [        R@                  " X5      US'   [        R                  " U5      U l"        [G        U R0                  [        R.                  5      (       a  U R0                  RH                  U R0                  R(                  S   -  U R0                  R(                  S   -  n[        RJ                  RM                  U R0                  RN                  [P        RR                  " SU-  5      S9  U R0                  RT                  b3  [        RJ                  RW                  U R0                  RT                  5        GOU R0                  RX                  Gb  [G        U R0                  RX                  [        R.                  5      (       a  [        RJ                  R[                  U R0                  RX                  RN                  S[P        RR                  " SU R0                  RX                  R&                  -  5      S9  U R0                  RX                  RT                  b=  [        RJ                  RW                  U R0                  RX                  RT                  5        []        U RD                  S5      (       a  [G        U RD                  R^                  [        R@                  5      (       a  U RD                  R^                  R`                  n[        RJ                  RM                  U RD                  R^                  RN                  [P        RR                  " SU-  5      S9  [        RJ                  RW                  U RD                  R^                  RT                  5        [G        U RD                  Rb                  [        R@                  5      (       a{  [        RJ                  RW                  U RD                  Rb                  RN                  5        [        RJ                  RW                  U RD                  Rb                  RT                  5        g g )Nr   z&Input shape indivisible by patch size!rU   conv_bn_relu_)in_channelsr$   r%   r&   r'   r(   	conv_lastr   )r   r$   r%   )r   r$   r%   r&   r	   head
pre_logitsactr@   r   g       @)meanrA   )2rB   rC   r   r   r   r   r   rq   r;   rr   r<   r   r   r'   r0   r   	enumerate
add_moduler
   r$   r%   r&   r(   Conv2d	conv_projr   zerosclass_tokenr   encoderr   r   rG   TanhheadsrF   r   rH   trunc_normal_rJ   mathsqrtrK   zeros_r   rL   hasattrr   in_featuresr   )rM   r   r   r   rp   rq   r;   r<   rr   r   r   r'   r   seq_projprev_channelsra   conv_stem_layer_configr   heads_layersfan_inrO   s                       r6   rC   VisionTransformer.__init__   s?    	D!j-24\]$$$!2&#6 $(}}HM-67H-I))###A3'($1%;%H%H$:$F$F5<<#9#D#D)?)P)P
 !7 C C .J RYY=ghi )1NYYJWaDN !.14
 <<Aq*(EFa
	
 %4?M&#%99Z#EL )+:)SL&"$'')L#%99-@#NL ]]<0
dnnbii00^^//$..2L2LQ2OORVR`R`RlRlmnRooFGG!!$.."7"7TYYq6z=R!S~~"".t~~223^^%%1jAYAY[][d[d6e6eGGOO((//ctyyt~~OgOgOtOtIt?u   ~~'',,8t~~77<<=4::|,,DJJ<Q<QSUS\S\1]1]ZZ**66FGG!!$**"7"7">">DIIaRXjDY!ZGGNN4::00556djjooryy11GGNN4::??112GGNN4::??//0 2r5   r   returnc                    UR                   u  p#pEU R                  n[        R                  " X@R                  :H  SU R                   SU S35        [        R                  " XPR                  :H  SU R                   SU S35        XF-  nXV-  nU R                  U5      nUR                  X R                  Xx-  5      nUR                  SSS5      nU$ )NzWrong image height! Expected z	 but got !zWrong image width! Expected r   r	   r   )	r   r   r   r   r   r   reshaperq   permute)	rM   r   nchwpn_hn_ws	            r6   _process_input VisionTransformer._process_input  s    WW
aOOa??*.KDOOK\\efgehhi,jka??*.J4??J[[defdggh,ijff NN1IIa#)4 IIaAr5   c                    U R                  U5      nUR                  S   nU R                  R                  USS5      n[        R
                  " X1/SS9nU R                  U5      nUS S 2S4   nU R                  U5      nU$ )Nr   r   r   )r   r   r   expandr   catr   r   )rM   r   r   batch_class_tokens       r6   r   VisionTransformer.forward!  s~    "GGAJ !,,33Ar2>II(,!4LLO adGJJqMr5   )rr   r   r   r<   r   r   rq   r   r;   r'   r   r   r   r   )r*   r+   r,   r-   rf   r   r0   r   r.   rh   r   r   r   r2   listr"   rC   r   r   r   r4   ri   rj   s   @r6   r   r      s   E #&-15<R\\t5T<@g1g1 g1 	g1
 g1 g1 g1 g1 !g1 g1 &c]g1 S%((//12g1 $D$89g1 g1R  *  r5   r   r   r   rp   rq   r;   weightsprogresskwargsr   c           
      p   Ubh  [        US[        UR                  S   5      5        UR                  S   S   UR                  S   S   :X  d   e[        USUR                  S   S   5        UR                  SS5      n[	        SUU UUUUS.UD6n	U(       a  U	R                  UR                  US	S
95        U	$ )Nr   
categoriesmin_sizer   r   r      )r   r   r   rp   rq   r;   T)r   
check_hashr)   )r   lenmetarX   r   load_state_dictget_state_dict)
r   r   rp   rq   r;   r   r   r   r   models
             r6   _vision_transformerr   4  s     fmSl9S5TU||J'*gll:.Fq.IIIIflGLL4LQ4OPL#.J  E g44hSW4XYLr5   r   _COMMON_METAz(https://github.com/facebookresearch/SWAGz:https://github.com/facebookresearch/SWAG/blob/main/LICENSE)recipelicensec                       \ rS rSr\" S\" \SS90 \ESSSSS	S
S.0SSSS.ES9r\" S\" \SS\	R                  S90 \ESSSSSS.0SSSS.ES9r\" S\" \SS\	R                  S90 \ESSSSSSS.0SSS S!.ES9r\rS"rg#)$r   i_  z9https://download.pytorch.org/models/vit_b_16-c867db91.pthr   	crop_sizei(r   r   zNhttps://github.com/pytorch/vision/tree/main/references/classification#vit_b_16ImageNet-1KgS㥛DT@g1ZW@zacc@1zacc@5gMb1@g(\t@
                These weights were trained from scratch by using a modified version of `DeIT
                <https://arxiv.org/abs/2012.12877>`_'s training recipe.
            
num_paramsr   r   _metrics_ops
_file_size_docsurl
transformsr   z>https://download.pytorch.org/models/vit_b_16_swag-9ac1b537.pth  r   resize_sizeinterpolationi^-)r   r   g~jtSU@giX@gˡEK@g|?5^t@
                These weights are learnt via transfer learning by end-to-end fine-tuning the original
                `SWAG <https://arxiv.org/abs/2201.08371>`_ weights on ImageNet-1K data.
            r   r   r   r   r   r   zAhttps://download.pytorch.org/models/vit_b_16_lc_swag-4e70ced5.pth+https://github.com/pytorch/vision/pull/5793gbX9xT@gQX@
                These weights are composed of the original frozen `SWAG <https://arxiv.org/abs/2201.08371>`_ trunk
                weights and a linear classifier learnt on top of them trained on ImageNet-1K data.
            r   r   r   r   r   r   r   r)   Nr*   r+   r,   r-   r   r   r   r   IMAGENET1K_V1r   BICUBIC_COMMON_SWAG_METAIMAGENET1K_SWAG_E2E_V1IMAGENET1K_SWAG_LINEAR_V1DEFAULTr4   r)   r5   r6   r   r   _  s#   G.#>

""f##  !
M, %L+33	


""##  !
4 !(O+33	


C""##  !
!6 Gr5   r   c                   T    \ rS rSr\" S\" \SS90 \ESSSSS	S
S.0SSSS.ES9r\r	Sr
g)r   i  z9https://download.pytorch.org/models/vit_b_32-d86f8d99.pthr   r   i1Br   zNhttps://github.com/pytorch/vision/tree/main/references/classification#vit_b_32r   g|?5^R@gW@r   gA`Т@gl	u@r   r   r   r)   Nr*   r+   r,   r-   r   r   r   r   r  r  r4   r)   r5   r6   r   r     s\    G.#>

""f##  !
M, Gr5   r   c                       \ rS rSr\" S\" \SSS90 \ESSSS	S
SS.0SSSS.ES9r\" S\" \SS\	R                  S90 \ESSS	SSS.0SSSS.ES9r\" S\" \SS\	R                  S90 \ESSSS	SS S.0SSS!S".ES9r\rS#rg$)%r   i  z9https://download.pytorch.org/models/vit_l_16-852ce7e3.pthr      )r   r   i#r   zNhttps://github.com/pytorch/vision/tree/main/references/classification#vit_l_16r   g|?5^S@gFԨW@r   gףp=
N@g;O$@a  
                These weights were trained from scratch by using a modified version of TorchVision's
                `new training recipe
                <https://pytorch.org/blog/how-to-train-state-of-the-art-models-using-torchvision-latest-primitives/>`_.
            r   r   z>https://download.pytorch.org/models/vit_l_16_swag-4f3808c9.pth   r   i0)r  r  gjtV@gT㥛ĠX@gƟv@gy&11@r   r   zAhttps://download.pytorch.org/models/vit_l_16_lc_swag-4d563306.pthr   gMbXIU@g^I[X@r   r   r)   Nr  r)   r5   r6   r   r     s%   G.#3O

#"f##  "
M. %L+33	


#"##  "
4 !(O+33	


C#"##  "
!6 Gr5   r   c                   T    \ rS rSr\" S\" \SS90 \ESSSSS	S
S.0SSSS.ES9r\r	Sr
g)r   i  z9https://download.pytorch.org/models/vit_l_32-c7638314.pthr   r   i[Er   zNhttps://github.com/pytorch/vision/tree/main/references/classification#vit_l_32r   g|?5>S@gGzDW@r   gK7.@gE@r   r   r   r)   Nr	  r)   r5   r6   r   r     s\    G.#>

#"f#"  "
M, Gr5   r   c                       \ rS rSr\" S\" \SS\R                  S90 \	ESSSSS	S
.0SSSS.ES9r
\" S\" \SS\R                  S90 \	ESSSSSSS
.0SSSS.ES9r\
rSrg)r   i2  z>https://download.pytorch.org/models/vit_h_14_swag-80465313.pth  r   i%)r  r  r   gS#V@g#~jX@r   g~jŏ@gK7I@r   r   r   zAhttps://download.pytorch.org/models/vit_h_14_lc_swag-c1eb923e.pthr   r   i@%r   gZd;OmU@gQnX@g=
ףpd@gIk֢@r   r   r)   N)r*   r+   r,   r-   r   r   r   r   r  r  r  r  r  r4   r)   r5   r6   r   r   2  s    $L+33	


#"##  "
4 !(O+33	


C#"##  "
!6 %Gr5   r   
pretrained)r   T)r   r   c                 R    [         R                  U 5      n [        SSSSSSU US.UD6$ )au  
Constructs a vit_b_16 architecture from
`An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.

Args:
    weights (:class:`~torchvision.models.ViT_B_16_Weights`, optional): The pretrained
        weights to use. See :class:`~torchvision.models.ViT_B_16_Weights`
        below for more details and possible values. By default, no pre-trained weights are used.
    progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.ViT_B_16_Weights
    :members:
            r   r   rp   rq   r;   r   r   r)   )r   verifyr   r   r   r   s      r6   r   r   k  E    ( %%g.G 		 	 	r5   c                 R    [         R                  U 5      n [        SSSSSSU US.UD6$ )au  
Constructs a vit_b_32 architecture from
`An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.

Args:
    weights (:class:`~torchvision.models.ViT_B_32_Weights`, optional): The pretrained
        weights to use. See :class:`~torchvision.models.ViT_B_32_Weights`
        below for more details and possible values. By default, no pre-trained weights are used.
    progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.ViT_B_32_Weights
    :members:
r#   r  r  r  r  r)   )r   r  r   r  s      r6   r   r     r  r5   c                 R    [         R                  U 5      n [        SSSSSSU US.UD6$ )au  
Constructs a vit_l_16 architecture from
`An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.

Args:
    weights (:class:`~torchvision.models.ViT_L_16_Weights`, optional): The pretrained
        weights to use. See :class:`~torchvision.models.ViT_L_16_Weights`
        below for more details and possible values. By default, no pre-trained weights are used.
    progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.ViT_L_16_Weights
    :members:
r           r  r)   )r   r  r   r  s      r6   r   r     E    ( %%g.G 		 	 	r5   c                 R    [         R                  U 5      n [        SSSSSSU US.UD6$ )au  
Constructs a vit_l_32 architecture from
`An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.

Args:
    weights (:class:`~torchvision.models.ViT_L_32_Weights`, optional): The pretrained
        weights to use. See :class:`~torchvision.models.ViT_L_32_Weights`
        below for more details and possible values. By default, no pre-trained weights are used.
    progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.ViT_L_32_Weights
    :members:
r#   r  r  r  r  r  r)   )r   r  r   r  s      r6   r   r     r  r5   )r  Nc                 R    [         R                  U 5      n [        SSSSSSU US.UD6$ )au  
Constructs a vit_h_14 architecture from
`An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale <https://arxiv.org/abs/2010.11929>`_.

Args:
    weights (:class:`~torchvision.models.ViT_H_14_Weights`, optional): The pretrained
        weights to use. See :class:`~torchvision.models.ViT_H_14_Weights`
        below for more details and possible values. By default, no pre-trained weights are used.
    progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.vision_transformer.VisionTransformer``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/vision_transformer.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.ViT_H_14_Weights
    :members:
   r#   r  i   i   r  r)   )r   r  r   r  s      r6   r    r      r  r5   r   model_statezOrderedDict[str, torch.Tensor]interpolation_modereset_headsc                    US   nUR                   u  pgnUS:w  a  [        SUR                    35      eX-  S-  S-   n	X:w  Ga3  US-  nU	S-  n	USS2SS2SS24   n
USS2SS2SS24   nUR                  SSS5      n[        [        R
                  " U5      5      nX-  U:w  a  [        SX-   SU 35      eUR                  SXU5      nX-  n[        R                  R                  UUUS	S
9nUR                  SX5      nUR                  SSS5      n[        R                  " X/SS9nXS'   U(       aC  [        5       nUR                  5        H#  u  nnUR                  S5      (       a  M  UUU'   M%     UnU$ )aw  This function helps interpolate positional embeddings during checkpoint loading,
especially when you want to apply a pre-trained model on images with different resolution.

Args:
    image_size (int): Image size of the new model.
    patch_size (int): Patch size of the new model.
    model_state (OrderedDict[str, torch.Tensor]): State dict of the pre-trained model.
    interpolation_mode (str): The algorithm used for upsampling. Default: bicubic.
    reset_heads (bool): If true, not copying the state of heads. Default: False.

Returns:
    OrderedDict[str, torch.Tensor]: A state dict which can be loaded into the new model.
zencoder.pos_embeddingr   z%Unexpected position embedding shape: r	   Nr   zPseq_length is not a perfect square! Instead got seq_length_1d * seq_length_1d = z and seq_length = T)sizemodealign_cornersr   r   )r   
ValueErrorr   r.   r   r   r   r0   
functionalinterpolater   r   r   items
startswith)r   r   r#  r$  r%  r   r   r   rq   new_seq_lengthpos_embedding_tokenpos_embedding_imgseq_length_1dnew_seq_length_1dnew_pos_embedding_imgnew_pos_embeddingmodel_state_copykvs                      r6   interpolate_embeddingsr9    s   *   78M - 3 3A:Av@ATAT@UVWW .14q8N
 #a
!+Arr1H5)!QR(3 .55aA>DIIj12(J6bcp  dA  cC  CU  V`  Ua  b 
 .55aTab&4 !# 9 9"#	 !: !
 !6 = =a \ !6 = =aA F!II':&RXYZ/@+,AL#))+1||G,,*+$Q' , +Kr5   )bicubicF)9r   collectionsr   	functoolsr   typingr   r   r   r   r   torch.nnr0   ops.miscr
   r   transforms._presetsr   r   utilsr   _apir   r   r   _metar   _utilsr   r   __all__r"   r8   r2   rl   r   r   r.   boolr   r   dictstrr/   r  r   r   r   r   r   r  r   r   r   r   r    r9  r)   r5   r6   <module>rI     sh    #  6 6   0 H ' 6 6 ' B9Z 9+
s +
\!299 !H#9bii #9LQ		 Qh  	
  k"   B & d38n 8K L{ L^{ 4M{ M`{ 46%{ 6%r ,0@0N0N!OP6:T "23 d ]` ev  Q @ ,0@0N0N!OP6:T "23 d ]` ev  Q @ ,0@0N0N!OP6:T "23 d ]` ev  Q @ ,0@0N0N!OP6:T "23 d ]` ev  Q @ !566:T "23 d ]` ev  7 H (KKK 2K 	K
 K &Kr5   