
    h                        S SK r S SKJr  S SKJr  S SKJr  S SKJrJ	r	J
r
  S SKrS SKrS SKJr  SSKJrJr  SSKJr  SS	KJr  S
SKJrJrJr  S
SKJr  S
SKJrJr  / SQr\ " S S5      5       r S\\!   S\!4S jr"S\RF                  S\!S\!S\$\RF                  \!4   4S jr%S\RF                  S\!S\!S\!S\RF                  4
S jr&\RN                  RQ                  S5        \RN                  RQ                  S5         " S S\RR                  5      r*S\RF                  S\!S\RF                  4S  jr+S!\RF                  S"\RF                  S#\$\!\!\!4   S$\$\!\!\!4   S%\RF                  S&\RF                  S'\RF                  S\RF                  4S( jr,S\RF                  S)\RF                  S*\-4S+ jr.\RN                  RQ                  S,5        \RN                  RQ                  S-5         " S. S/\RR                  5      r/ " S0 S1\RR                  5      r0 " S2 S3\RR                  5      r1 " S4 S5\RR                  5      r2S6\3\    S7\4S8\
\   S9\-S:\S\24S; jr5 " S< S=\5      r6 " S> S?\5      r7\" 5       \" S@\6Rp                  4SA9SSBSC.S8\
\6   S9\-S:\S\24SD jj5       5       r9\" 5       \" S@\7Rp                  4SA9SSBSC.S8\
\7   S9\-S:\S\24SE jj5       5       r:g)F    N)Sequence)	dataclass)partial)AnyCallableOptional   )MLPStochasticDepth)VideoClassification)_log_api_usage_once   )register_modelWeightsWeightsEnum)_KINETICS400_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)MViTMViT_V1_B_Weights	mvit_v1_bMViT_V2_S_Weights	mvit_v2_sc                   t    \ rS rSr% \\S'   \\S'   \\S'   \\   \S'   \\   \S'   \\   \S'   \\   \S'   S	rg
)MSBlockConfig   	num_headsinput_channelsoutput_channelskernel_q	kernel_kvstride_q	stride_kv N)__name__
__module____qualname____firstlineno__int__annotations__list__static_attributes__r$       U/var/www/auris/envauris/lib/python3.13/site-packages/torchvision/models/video/mvit.pyr   r      s;    N3iCy3iCyr-   r   sreturnc                 $    SnU  H  nX-  nM	     U$ N   r$   )r/   productvs      r.   _prodr6   '   s    G Nr-   x
target_dim
expand_dimc                     U R                  5       nX1S-
  :X  a  U R                  U5      n X4$ X1:w  a  [        SU R                   35      eX4$ )Nr3   zUnsupported input dimension )dim	unsqueeze
ValueErrorshaper7   r8   r9   
tensor_dims       r.   
_unsqueezerA   .   sV    J!^#KK
# = 
	!7yABB=r-   r@   c                 8    X1S-
  :X  a  U R                  U5      n U $ r2   )squeezer?   s       r.   _squeezerD   7   s    !^#IIj!Hr-   rA   rD   c                      ^  \ rS rSr  SS\R
                  S\\R
                     S\\R
                     S\SS4
U 4S jjjrS	\	R                  S
\\\\4   S\\	R                  \\\\4   4   4S jrSrU =r$ )PoolA   Npoolnorm
activationnorm_before_poolr0   c                    > [         TU ]  5         Xl        / nUb  UR                  U5        Ub  UR                  U5        U(       a  [        R
                  " U6 OS U l        X@l        g )N)super__init__rH   appendnn
Sequentialnorm_actrK   )selfrH   rI   rJ   rK   layers	__class__s         r.   rN   Pool.__init__B   sX     		MM$!MM*%28v.d 0r-   r7   thwc                 ~   [        USS5      u  p[        R                  " USSS9u  pAUR                  SS5      nUR                  S S u  pVnUR                  XV-  U4U-   5      R                  5       nU R                  (       a  U R                  b  U R                  U5      nU R                  U5      nUR                  SS  u  pn
UR                  XVUS5      R                  SS5      n[        R                  " XA4SS9nU R                  (       d  U R                  b  U R                  U5      n[        USSU5      nXX44$ )	N   r3   )r3   r   )indicesr;   r	   r;   )rA   torchtensor_split	transposer>   reshape
contiguousrK   rR   rH   catrD   )rS   r7   rW   r@   class_tokenBNCTHWs              r.   forwardPool.forwardS   s!   "1a+ ++AtCKK1''"1+aIIquaj3&'224   T]]%>a A IIaL''!"+aIIaAr",,Q2II{&A.$$)Ba AQ1j)a)|r-   )rR   rK   rH   )NF)r%   r&   r'   r(   rP   Moduler   boolrN   r]   Tensortupler)   rj   r,   __classcell__rU   s   @r.   rF   rF   A   s    
 +/!&1ii1 ryy!1 RYY'	1
 1 
1 1" E#sC-,@ U5<<Y^_bdgil_lYmKmEn  r-   rF   	embeddingdc                     U R                   S   U:X  a  U $ [        R                  R                  U R	                  SS5      R                  S5      USS9R                  S5      R	                  SS5      $ )Nr   r3   linear)sizemode)r>   rP   
functionalinterpolatepermuter<   rC   )rr   rs   s     r.   _interpolater{   m   sn    qQ 	!!a#--a0 	" 	

 
	Ar-   attnqq_thwk_thw	rel_pos_h	rel_pos_w	rel_pos_tc                    Uu  pxn	Uu  pn[        S[        X5      -  S-
  5      n[        S[        X5      -  S-
  5      n[        S[        Xz5      -  S-
  5      n[        X-  S5      n[        X-  S5      n[        R                  " U5      S S 2S 4   U-  [        R                  " U5      S S S 24   SU-
  -   U-  -
  n[        X-  S5      n[        X-  S5      n[        R                  " U	5      S S 2S 4   U-  [        R                  " U5      S S S 24   SU-
  -   U-  -
  n[        X-  S5      n[        Xz-  S5      n[        R                  " U5      S S 2S 4   U-  [        R                  " U
5      S S S 24   SU
-
  -   U-  -
  n[	        XM5      n[	        X^5      n[	        Xo5      nUUR                  5          nUUR                  5          nUUR                  5          nUR                  u  nnnnUS S 2S S 2SS 24   R                  UUXxU	U5      n [        R                  " SU U5      n![        R                  " SU U5      n"U R                  SSSSSS	5      R                  UUU-  U-  U	-  U5      n [        R                  " U UR                  SS5      5      R                  SS5      n#U#R                  UUXXz5      R                  SSSSSS	5      n#U!S S 2S S 2S S 2S S 2S S 2S S S 2S 4   U"S S 2S S 2S S 2S S 2S S 2S S S S 24   -   U#S S 2S S 2S S 2S S 2S S 2S S 2S S 4   -   R                  UUXx-  U	-  X-  U-  5      n$U S S 2S S 2SS 2SS 24==   U$-  ss'   U $ )
Nr   r3         ?zbythwc,hkc->bythwkzbythwc,wkc->bythwkr   r	   rY      )r)   maxr]   aranger{   longr>   r`   einsumrz   matmulr_   view)%r|   r}   r~   r   r   r   r   q_tq_hq_wk_tk_hk_wdhdwdt	q_h_ratio	k_h_ratiodist_h	q_w_ratio	k_w_ratiodist_w	q_t_ratio	k_t_ratiodist_tRhRwRtrd   n_head_r;   r_qrel_h_qrel_w_qrel_q_trel_poss%                                        r.   _add_rel_posr   |   sS    MCcMCc	QS"	#B	QS"	#B	QS"	#B CIs#ICIs#I\\#q$w')3u||C7Hq7QUX[^U^7_cl6llFCIs#ICIs#I\\#q$w')3u||C7Hq7QUX[^U^7_cl6llFCIs#ICIs#I\\#q$w')3u||C7Hq7QUX[^U^7_cl6llF Y+IY+IY+I	6;;=	!B	6;;=	!B	6;;=	!BAvq#
Aq!"H+

a3
<Cll/b9Gll/b9G
++aAq!Q
'
/
/QZ#5E5KS
QCll3Q 23==aCGll1fc9AA!Q1aQRSG 	1aAtQ,-
!Q1atQ.
/	0
!Q1aD$.
/	0 gaS#)c/:	  	Aqr12'!Kr-   shortcutresidual_with_cls_embedc           	          U(       a  U R                  U5        U $ U S S 2S S 2SS 2S S 24==   US S 2S S 2SS 2S S 24   -  ss'   U $ r2   )add_)r7   r   r   s      r.   _add_shortcutr      sF    	x H 	
!QA+(1aQ;//Hr-   r   r   c                     ^  \ rS rSrS\R
                  4S\\   S\S\S\S\\   S\\   S	\\   S
\\   S\S\S\S\	S\
S\R                  4   SS4U 4S jjjrS\R                  S\\\\4   S\\R                  \\\\4   4   4S jrSrU =r$ )MultiscaleAttention           
input_size	embed_dim
output_dimr   r    r!   r"   r#   residual_poolr   rel_pos_embeddropout
norm_layer.r0   Nc                 *  > [         TU ]  5         X l        X0l        X@l        X4-  U l        S[        R                  " U R
                  5      -  U l        Xl	        Xl
        [        R                  " USU-  5      U l        [        R                  " X35      /nUS:  a$  UR                  [        R                  " USS95        [        R                   " U6 U l        S U l        ['        U5      S:  d  ['        U5      S:  au  U Vs/ s H  n[)        US-  5      PM     nn[+        [        R,                  " U R
                  U R
                  UUUU R
                  SS	9U" U R
                  5      5      U l        S U l        S U l        ['        U5      S:  d  ['        U5      S:  a  U Vs/ s H  n[)        US-  5      PM     nn[+        [        R,                  " U R
                  U R
                  UUUU R
                  SS	9U" U R
                  5      5      U l        [+        [        R,                  " U R
                  U R
                  UUUU R
                  SS	9U" U R
                  5      5      U l        S U l        S U l        S U l        U(       Ga  [9        USS  5      n[;        U5      S
:  a  UUS   -  OUn[;        U5      S
:  a  UUS   -  OUnS[9        UU5      -  S-
  nSUS
   -  S-
  n[        R<                  " [>        R@                  " UU R
                  5      5      U l        [        R<                  " [>        R@                  " UU R
                  5      5      U l        [        R<                  " [>        R@                  " UU R
                  5      5      U l        [        RB                  RE                  U R2                  SS9  [        RB                  RE                  U R4                  SS9  [        RB                  RE                  U R6                  SS9  g g s  snf s  snf )Nr   r	   r   Tinplacer3   r   F)stridepaddinggroupsbiasr   {Gz?std)#rM   rN   r   r   r   head_dimmathsqrtscalerr   r   rP   LinearqkvrO   DropoutrQ   projectpool_qr6   r)   rF   Conv3dpool_kpool_vr   r   r   r   len	Parameterr]   zerosinittrunc_normal_)rS   r   r   r   r   r    r!   r"   r#   r   r   r   r   r   rT   r}   	padding_qkv
padding_kvrv   q_sizekv_sizespatial_dimtemporal_dimrU   s                           r.   rN   MultiscaleAttention.__init__   s@     	"$""/DIIdmm44*'>$99YJ7#%99Z#D"ES=MM"**Wd;<}}f-+/?Q%/A"5.67hQ!VhI7		MMMM#%== 4==)DK ,0+/a5#3a#71:;2#bAg,J;		MMMM$&== 4==)DK 		MMMM$&== 4==)DK 261515z!"~&D,/MA,=TXa[(4F.1)nq.@dil*dGc&'22Q6Kz!},q0L\\%++k4==*QRDN\\%++k4==*QRDN\\%++lDMM*RSDNGG!!$..d!;GG!!$..d!;GG!!$..d!; ] 8" <s    P!Pr7   rW   c           	         UR                   u  p4nU R                  U5      R                  X4SU R                  U R                  5      R                  SS5      R                  SS9u  pgnU R                  b  U R                  Xr5      u  pyOUn	U R                  b  U R                  X5      S   nU R                  b  U R                  Xb5      u  pb[        R                  " U R                  U-  UR                  SS5      5      n
U R                  bI  U R                  b<  U R                  b/  [!        U
UUU	U R                  U R                  U R                  5      n
U
R#                  SS9n
[        R                  " X5      nU R$                  (       a  ['        XU R(                  5        UR                  SS5      R                  USU R*                  5      nU R-                  U5      nX4$ )Nr	   r3   r   r\   r   r[   )r>   r   r`   r   r   r_   unbindr   r   r   r]   r   r   r   r   r   r   softmaxr   r   r   r   r   )rS   r7   rW   rd   re   rf   r}   kr5   r   r|   s              r.   rj   MultiscaleAttention.forward!  s   ''a((1+%%aAt~~t}}MWWXY[\]ddijdka;;"{{1*HAuE;;"A#A&A;;"[[(FA||DKK!OQ[[A->?>>%$..*DIcD |||#LL!! < <=KK1%%aT__=LLOvr-   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r%   r&   r'   r(   rP   	LayerNormr+   r)   rm   floatr   rl   rN   r]   rn   ro   rj   r,   rp   rq   s   @r.   r   r      s!    /1||Z<IZ< Z< 	Z<
 Z< s)Z< 9Z< s)Z< 9Z< Z< "&Z< Z< Z< S"))^,Z< 
Z< Z<x   E#sC-,@  U5<<Y^_bdgil_lYmKmEn    r-   r   c                      ^  \ rS rSrSS\R
                  4S\\   S\S\	S\	S\	S\	S	\
S
\
S\S\R                  4   SS4U 4S jjjrS\R                  S\\\\4   S\\R                  \\\\4   4   4S jrSrU =r$ )MultiscaleBlockiD  r   r   cnfr   r   r   proj_after_attnr   stochastic_depth_probr   .r0   Nc
                   > [         TU ]  5         X`l        S U l        [	        UR
                  5      S:  ar  UR
                   V
s/ s H  oS:  a  U
S-   OU
PM     nn
U Vs/ s H  n[        US-  5      PM     nn[        [        R                  " XR
                  US9S 5      U l        U(       a  UR                  OUR                  nU	" UR                  5      U l        U	" U5      U l        [        U R                  [        R                  5      U l        [#        UUR                  UUR$                  UR&                  UR(                  UR
                  UR*                  UUUUU	S9U l        [/        USU-  UR                  /[        R0                  US S9U l        [5        US5      U l        S U l        UR                  UR                  :w  a1  [        R:                  " UR                  UR                  5      U l        g g s  sn
f s  snf )Nr3   r   )r   r   )	r    r!   r"   r#   r   r   r   r   r   rY   )activation_layerr   r   row)rM   rN   r   	pool_skipr6   r"   r)   rF   rP   	MaxPool3dr   r   norm1norm2
isinstanceBatchNorm1dneeds_transposalr   r   r    r!   r#   r|   r
   GELUmlpr   stochastic_depthr   r   )rS   r   r   r   r   r   r   r   r   r   r/   kernel_skipr   padding_skipattn_dimrU   s                  r.   rN   MultiscaleBlock.__init__E  s    	..2":=,,G,QE1q5q0,KG1<=ACQKL=![|TVZDN +:3&&s?Q?Q 2 23
)
 *4::r~~ F'MM\\mm\\mm''$;!
	 \3../WW
 !00Eu M,0!4!4499S%7%79L9LMDL 5M H=s   G= Hr7   rW   c                    U R                   (       a1  U R                  UR                  SS5      5      R                  SS5      OU R                  U5      nU R                  X25      u  pEU R                  b  U R
                  (       d  UOU R	                  U5      nU R                  c  UOU R                  X5      S   nX`R                  U5      -   nU R                   (       a1  U R                  UR                  SS5      5      R                  SS5      OU R                  U5      nU R                  b  U R
                  (       a  UOU R	                  U5      nXR                  U R                  U5      5      -   U4$ )Nr3   r   r   )
r   r   r_   r|   r   r   r   r   r   r   )	rS   r7   rW   x_norm1x_attnthw_newx_skipx_norm2x_projs	            r.   rj   MultiscaleBlock.forward  s#   CGCXCX$**Q[[A./99!Q?^b^h^hij^k))G1%T-A-AAt||T[G\nn,$..2H2K**622CGCXCX$**Q[[A./99!Q?^b^h^hij^kll*d.B.BU\H]--dhhw.?@@'IIr-   )	r|   r   r   r   r   r   r   r   r   )r%   r&   r'   r(   rP   r   r+   r)   r   rm   r   r   rl   rN   r]   rn   ro   rj   r,   rp   rq   s   @r.   r   r   D  s     '*/1||8NI8N 8N 	8N
 "&8N 8N 8N 8N  %8N S"))^,8N 
8N 8Nt
J 
JE#sC-,@ 
JU5<<Y^_bdgil_lYmKmEn 
J 
Jr-   r   c            
          ^  \ rS rSrS\S\\\4   S\S\SS4
U 4S jjrS	\R                  S\R                  4S
 jr
SrU =r$ )PositionalEncodingi  
embed_sizespatial_sizetemporal_sizer   r0   Nc                 >  > [         TU ]  5         X l        X0l        [        R
                  " [        R                  " U5      5      U l        S U l	        S U l
        S U l        U(       d  [        R
                  " [        R                  " U R                  S   U R                  S   -  U5      5      U l	        [        R
                  " [        R                  " U R                  U5      5      U l
        [        R
                  " [        R                  " U5      5      U l        g g )Nr   r3   )rM   rN   r  r  rP   r   r]   r   rc   spatial_postemporal_pos	class_pos)rS   r  r  r  r   rU   s        r.   rN   PositionalEncoding.__init__  s    (*<<J(?@374815!||EKK8I8I!8LtO`O`abOc8ceo,pqD "U[[9K9KZ-X YD\\%++j*ABDN r-   r7   c                    U R                   R                  UR                  S5      S5      R                  S5      n[        R
                  " X!4SS9nU R                  b  U R                  b  U R                  b  U R                  R                  u  p4[        R                  " U R                  USS9nUR                  U R                  R                  S5      R                  U R                  SS5      R                  SU5      5        [        R
                  " U R                  R                  S5      U4SS9R                  S5      nUR                  U5        U$ )Nr   r[   r3   r\   )rc   expandrv   r<   r]   rb   r  r  r	  r>   repeat_interleaver   r  r`   )rS   r7   rc   hw_sizer  pos_embeddings         r.   rj   PositionalEncoding.forward  s   &&--affQi<FFqIII{&A.'D,=,=,IdnnNh"&"2"2"8"8G!33D4E4EwTUVMt//99!<CCDDVDVXZ\^_gghjlvwx!IIt~~'?'?'BM&RXYZddefgMFF=!r-   )r	  rc   r  r  r  r  )r%   r&   r'   r(   r)   ro   rm   rN   r]   rn   rj   r,   rp   rq   s   @r.   r  r    s\    C3 CeCHo CVY Cjn Csw C %,,  r-   r  c            $       <  ^  \ rS rSr         SS\\\4   S\S\\   S\S\S\S	\S
\	S\	S\	S\S\
\S\R                  4      S\
\S\R                  4      S\\\\4   S\\\\4   S\\\\4   SS4"U 4S jjjrS\R                   S\R                   4S jrSrU =r$ )r   i  Nr  r  block_settingr   r   r   r   r   attention_dropoutr   num_classesblock.r   patch_embed_kernelpatch_embed_stridepatch_embed_paddingr0   c                 P  > [         TU ]  5         [        U 5        [        U5      nUS:X  a  [	        S5      eUc  [
        nUc  [        [        R                  SS9n[        R                  " SUS   R                  UUUS9U l        [        U4U-   U R                  R                  5       VVs/ s H  u  nnUU-  PM     nnn[        US   R                  US   US	   4US   US
9U l        [        R                   " 5       U l        [%        U5       H  u  nnU
U-  US-
  -  nU R"                  R'                  U" UUUUUUU	UUS9	5        [        UR(                  5      S:  d  MS  [        UUR(                  5       VVs/ s H  u  nnUU-  PM     nnnM     U" US   R*                  5      U l        [        R.                  " [        R0                  " USS9[        R2                  " US   R*                  U5      5      U l        U R7                  5        GH  n[9        U[        R2                  5      (       a  [        R:                  R=                  UR>                  SS9  [9        U[        R2                  5      (       a;  UR@                  b,  [        R:                  RC                  UR@                  S5        M  M  M  [9        U[        R                  5      (       at  UR>                  b*  [        R:                  RC                  UR>                  S5        UR@                  b-  [        R:                  RC                  UR@                  S5        GM7  GM:  [9        U[        5      (       d  GMR  URE                  5        H!  n[        R:                  R=                  USS9  M#     GM     gs  snnf s  snnf )a$  
MViT main class.

Args:
    spatial_size (tuple of ints): The spacial size of the input as ``(H, W)``.
    temporal_size (int): The temporal size ``T`` of the input.
    block_setting (sequence of MSBlockConfig): The Network structure.
    residual_pool (bool): If True, use MViTv2 pooling residual connection.
    residual_with_cls_embed (bool): If True, the addition on the residual connection will include
        the class embedding.
    rel_pos_embed (bool): If True, use MViTv2's relative positional embeddings.
    proj_after_attn (bool): If True, apply the projection after the attention.
    dropout (float): Dropout rate. Default: 0.0.
    attention_dropout (float): Attention dropout rate. Default: 0.0.
    stochastic_depth_prob: (float): Stochastic depth rate. Default: 0.0.
    num_classes (int): The number of classes.
    block (callable, optional): Module specifying the layer which consists of the attention and mlp.
    norm_layer (callable, optional): Module specifying the normalization layer to use.
    patch_embed_kernel (tuple of ints): The kernel of the convolution that patchifies the input.
    patch_embed_stride (tuple of ints): The stride of the convolution that patchifies the input.
    patch_embed_padding (tuple of ints): The padding of the convolution that patchifies the input.
r   z+The configuration parameter can't be empty.Ngư>)epsr	   )in_channelsout_channelskernel_sizer   r   r3   r   )r  r  r  r   r   )	r   r   r   r   r   r   r   r   r   r[   Tr   r   r   r   )#rM   rN   r   r   r=   r   r   rP   r   r   r   	conv_projzipr   r  pos_encoding
ModuleListblocks	enumeraterO   r"   r   rI   rQ   r   r   headmodulesr   r   r   weightr   	constant_
parameters)rS   r  r  r  r   r   r   r   r   r  r   r  r  r   r  r  r  total_stage_blocksrv   r   r   stage_block_idr   sd_probmweightsrU   s                             r.   rN   MViT.__init__  s   R 	
 	D! /"JKK=#E 48J &q)88*%'
 :=m=MP\=\^b^l^l^s^s9tu9tvdfn9t
u /$Q'66$Q-A7$Q-'	
 mmo#,]#;NC+n<@RUX@XYGKK)"/,C"/$3-*1)
 3<< 1$ADZQTQ]Q]A^_A^vdfnA^
_
' $<( }R0@@A	 MMJJw-IImB'77E
	
 A!RYY''%%ahhD%9a++0BGG%%affc2 1C+Ar||,,88'GG%%ahh466%GG%%affc2 &A122 ||~GGG))'t)<  .  Q v> `s   ,N;N"r7   c                    [        USS5      S   nU R                  U5      nUR                  S5      R                  SS5      nU R	                  U5      nU R                  R
                  4U R                  R                  -   nU R                   H  nU" X5      u  pM     U R                  U5      nUS S 2S4   nU R                  U5      nU$ )Nr   r   r   r3   )
rA   r  flattenr_   r   r  r  r"  rI   r$  )rS   r7   rW   r  s       r.   rj   MViT.forward"  s    q!Q"NN1IIaL""1a( a    ..043D3D3Q3QQ[[E1]FAs !IIaL adGIIaLr-   )r"  r  r$  rI   r   )	g      ?r   r   i  NN)r	      r2  )r   rY   rY   )r3   r	   r	   )r%   r&   r'   r(   ro   r)   r   r   rm   r   r   r   rP   rl   rN   r]   rn   rj   r,   rp   rq   s   @r.   r   r     sX    #&'*489=3<3<4=#v=CHov= v=  .	v=
 v= "&v= v= v= v= !v=  %v= v= bii01v= Xc299n56v= "#sC-0v=  "#sC-0!v=" #3S=1#v=$ 
%v= v=p %,,  r-   r   r  r   r-  progresskwargsc                 >   Ub  [        US[        UR                  S   5      5        UR                  S   S   UR                  S   S   :X  d   e[        USUR                  S   5        [        USUR                  S   5        UR                  SS	5      nUR                  SS
5      n[	        SUUU UR                  SS5      UR                  SS5      UR                  SS5      UR                  SS5      US.UD6nUb  UR                  UR                  USS95        U$ )Nr  
categoriesmin_sizer   r3   r  r  min_temporal_size   r:     r   Fr   Tr   r   )r  r  r  r   r   r   r   r   )r3  
check_hashr$   )r   r   metapopr   load_state_dictget_state_dict)r  r   r-  r3  r4  r  r  models           r.   _mvitrB  9  s$    fmSl9S5TU||J'*gll:.Fq.IIIIfngll:6NOfow||DW7XY::nj9LJJ3M 
!##jj%8 &

+Dd Kjj%8

#4e<3
 
E g44hSW4XYLr-   c                   V    \ rS rSr\" S\" \SSSSS9SS\S	S
SSSSS.0SSS.	S9r\r	Sr
g)r   iZ  z:https://download.pytorch.org/models/mvit_v1_b-dbeb1030.pthr9     ?rG  rG  ?rI  rI  	crop_sizeresize_sizemeanr   r;  zShttps://github.com/facebookresearch/pytorchvideo/blob/main/docs/source/model_zoo.mdThe weights were ported from the paper. The accuracies are estimated on video-level with parameters `frame_rate=7.5`, `clips_per_video=5`, and `clip_len=16`ip.Kinetics-400gJ+S@gh|?eW@zacc@1zacc@5guVQ@g rxa@	r7  r8  r6  recipe_docs
num_params_metrics_ops
_file_sizeurl
transformsr=  r$   Nr%   r&   r'   r(   r   r   r   r   KINETICS400_V1DEFAULTr,   r$   r-   r.   r   r   Z  sf    H #%
 #!#1k[ ###! !#
N: Gr-   r   c                   V    \ rS rSr\" S\" \SSSSS9SS\S	S
SSSSS.0SSS.	S9r\r	Sr
g)r   i{  z:https://download.pytorch.org/models/mvit_v2_s-ae3be167.pthr9  rD  rF  rH  rJ  r;  zChttps://github.com/facebookresearch/SlowFast/blob/main/MODEL_ZOO.mdrN  irO  g r0T@g(\W@rP  guVP@g?5^I|`@rQ  rX  r$   Nr[  r$   r-   r.   r   r   {  sf    H #%
 #!#1[[ ###! !#
N: Gr-   r   
pretrained)r-  T)r-  r3  c                 |   [         R                  U 5      n / SQ/ SQ/ SQ/ / SQ/ / SQ/ / / / / / / / / / / SQ/ // SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ// / SQ/ / SQ/ / / / / / / / / / / SQ/ // SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/S	.n/ n[        [        US
   5      5       HK  nUR	                  [        US
   U   US   U   US   U   US   U   US   U   US   U   US   U   S	95        MM     [        SSSUSSUR                  SS5      U US.UD6$ )aw  
Constructs a base MViTV1 architecture from
`Multiscale Vision Transformers <https://arxiv.org/abs/2104.11227>`__.

.. betastatus:: video module

Args:
    weights (:class:`~torchvision.models.video.MViT_V1_B_Weights`, optional): The
        pretrained weights to use. See
        :class:`~torchvision.models.video.MViT_V1_B_Weights` below for
        more details, and possible values. By default, no pre-trained
        weights are used.
    progress (bool, optional): If True, displays a progress bar of the
        download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.video.MViT``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/video/mvit.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.video.MViT_V1_B_Weights
    :members:
r3   r   r   rY   rY   rY   rY   rY   rY   rY   rY   rY   rY   rY      rb  `      re    rf  rf  rf  rf  rf  rf  rf  rf  rf  rf     rg  )re  re  rf  rf  rf  rf  rf  rf  rf  rf  rf  rf  rf  rg  rg  rg  r	   r	   r	   r3   r   r   r3   rb  rb  r3   rY   rY   r3   r3   r3   r   r   r   r    r!   r"   r#   r   r   r   r    r!   r"   r#   r9  r;  Fr   皙?)r  r  r  r   r   r   r-  r3  r$   )r   verifyranger   rO   r   rB  r>  r-  r3  r4  configr  is         r.   r   r     s   2  &&w/G FikB	2r2r2r2rSUWY[dfhi!
$ B	2r2r2r2rSUWY[dfhi!
1*FX M3vk*+, -a0%&67: &'8 9! <
+A. -a0
+A. -a0
	
 -  
# %$jj)@#F
 
 
r-   c                    [         R                  U 5      n / SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ// SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ// SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ// SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/S	.n/ n[        [        US
   5      5       HK  nUR	                  [        US
   U   US   U   US   U   US   U   US   U   US   U   US   U   S	95        MM     [        SSSUSSSSUR                  SS5      U US.
UD6$ )a  Constructs a small MViTV2 architecture from
`Multiscale Vision Transformers <https://arxiv.org/abs/2104.11227>`__ and
`MViTv2: Improved Multiscale Vision Transformers for Classification
and Detection <https://arxiv.org/abs/2112.01526>`__.

.. betastatus:: video module

Args:
    weights (:class:`~torchvision.models.video.MViT_V2_S_Weights`, optional): The
        pretrained weights to use. See
        :class:`~torchvision.models.video.MViT_V2_S_Weights` below for
        more details, and possible values. By default, no pre-trained
        weights are used.
    progress (bool, optional): If True, displays a progress bar of the
        download to stderr. Default is True.
    **kwargs: parameters passed to the ``torchvision.models.video.MViT``
        base class. Please refer to the `source code
        <https://github.com/pytorch/vision/blob/main/torchvision/models/video/mvit.py>`_
        for more details about this class.

.. autoclass:: torchvision.models.video.MViT_V2_S_Weights
        :members:
ra  )rd  rd  re  re  rf  rf  rf  rf  rf  rf  rf  rf  rf  rf  rf  rg  rc  rh  rl  ri  rj  rk  rm  r   r   r   r    r!   r"   r#   r9  r;  TFr   rn  )
r  r  r  r   r   r   r   r   r-  r3  r$   )r   ro  rp  r   rO   r   rB  r>  rq  s         r.   r   r     s   4  &&w/G Fhj!
& !
& !
& !
uLF\ M3vk*+, -a0%&67: &'8 9! <
+A. -a0
+A. -a0
	
 -  # %$jj)@#F  r-   );r   collections.abcr   dataclassesr   	functoolsr   typingr   r   r   r]   torch.fxtorch.nnrP   opsr
   r   transforms._presetsr   utilsr   _apir   r   r   _metar   _utilsr   r   __all__r   r)   r6   rn   ro   rA   rD   fxwraprl   rF   r{   r   rm   r   r   r   r  r   r+   r   rB  r   r   r\  r   r   r$   r-   r.   <module>r     s[    $ !  * *    ' 6 ( 7 7 + C   Xc] s %,, C S U5<<Y\K\E]  # 3 C TYT`T`  l  j )299 )XELL S U\\ 9
,,9||9 c39 c3	9
 ||9 ||9 ||9 \\9xU\\ U\\ TX  n  o }")) }@EJbii EJP :M299 M`&  k" 	
  
B B B ,0A0P0P!QR8<t ](#45 ] ]_b ]gk ] S ]@ ,0A0P0P!QR8<t B(#45 B B_b Bgk B S Br-   