
    kh                        d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
mZmZ d dlZd dlZd dlmZ ddlmZmZ ddlmZ ddlmZ d	d
lmZmZmZ d	dlmZ d	dlmZm Z  g dZ!e G d d             Z"dee#   de#fdZ$dejJ                  de#de#deejJ                  e#f   fdZ&dejJ                  de#de#de#dejJ                  f
dZ'ejP                  jS                  d       ejP                  jS                  d        G d dejT                        Z+dejJ                  de#dejJ                  fdZ,d ejJ                  d!ejJ                  d"ee#e#e#f   d#ee#e#e#f   d$ejJ                  d%ejJ                  d&ejJ                  dejJ                  fd'Z-dejJ                  d(ejJ                  d)e.fd*Z/ejP                  jS                  d+       ejP                  jS                  d,        G d- d.ejT                        Z0 G d/ d0ejT                        Z1 G d1 d2ejT                        Z2 G d3 d4ejT                        Z3d5e	e"   d6e4d7e
e   d8e.d9ede3fd:Z5 G d; d<e      Z6 G d= d>e      Z7 e        e d?e6jp                  f@      ddAdBd7e
e6   d8e.d9ede3fdC              Z9 e        e d?e7jp                  f@      ddAdBd7e
e7   d8e.d9ede3fdD              Z:y)E    N)	dataclass)partial)AnyCallableDictListOptionalSequenceTuple   )MLPStochasticDepth)VideoClassification)_log_api_usage_once   )register_modelWeightsWeightsEnum)_KINETICS400_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)MViTMViT_V1_B_Weights	mvit_v1_bMViT_V2_S_Weights	mvit_v2_sc                   l    e Zd ZU eed<   eed<   eed<   ee   ed<   ee   ed<   ee   ed<   ee   ed<   y)	MSBlockConfig	num_headsinput_channelsoutput_channelskernel_q	kernel_kvstride_q	stride_kvN)__name__
__module____qualname__int__annotations__r        Y/var/www/teggl/fontify/venv/lib/python3.12/site-packages/torchvision/models/video/mvit.pyr   r      s;    N3iCy3iCyr,   r   sreturnc                 "    d}| D ]  }||z  }	 |S N   r+   )r.   productvs      r-   _prodr5   &   s$    G 1Nr,   x
target_dim
expand_dimc                     | j                         }||dz
  k(  r| j                  |      } | |fS ||k7  rt        d| j                         | |fS )Nr2   zUnsupported input dimension )dim	unsqueeze
ValueErrorshaper6   r7   r8   
tensor_dims       r-   
_unsqueezer@   -   s^    JZ!^#KK
# j= 
z	!7yABBj=r,   r?   c                 8    ||dz
  k(  r| j                  |      } | S r1   )squeezer>   s       r-   _squeezerC   6   s!    Z!^#IIj!Hr,   r@   rC   c                        e Zd Z	 	 ddej                  deej                     deej                     deddf
 fdZdej                  d	e
eeef   de
ej                  e
eeef   f   fd
Z xZS )PoolNpoolnorm
activationnorm_before_poolr/   c                     t         |           || _        g }||j                  |       ||j                  |       |rt	        j
                  | nd | _        || _        y )N)super__init__rF   appendnn
Sequentialnorm_actrI   )selfrF   rG   rH   rI   layers	__class__s         r-   rL   zPool.__init__A   s\     		MM$!MM*%28v.d 0r,   r6   thwc                    t        |dd      \  }}t        j                  |dd      \  }}|j                  dd      }|j                  d d \  }}}|j                  ||z  |f|z         j                         }| j                  r| j                  | j                  |      }| j                  |      }|j                  dd  \  }}	}
|j                  |||d      j                  dd      }t        j                  ||fd      }| j                  s| j                  | j                  |      }t        |dd|      }|||	|
ffS )	N   r2   )r2   r   )indicesr:   r   r:   )r@   torchtensor_split	transposer=   reshape
contiguousrI   rP   rF   catrC   )rQ   r6   rT   r?   class_tokenBNCTHWs              r-   forwardzPool.forwardR   s3   "1a+: ++AtCQKK1''"1+1aIIq1uaj3&'224   T]]%>a A IIaL''!"+1aIIaAr",,Q2II{A&A.$$)Ba AQ1j)1a)|r,   )NF)r&   r'   r(   rN   Moduler	   boolrL   rZ   Tensorr   r)   rg   __classcell__rS   s   @r-   rE   rE   @   s    
 +/!&1ii1 ryy!1 RYY'	1
 1 
1" E#sC-,@ U5<<Y^_bdgil_lYmKmEn r,   rE   	embeddingdc                     | j                   d   |k(  r| S t        j                  j                  | j	                  dd      j                  d      |d      j                  d      j	                  dd      S )Nr   r2   linear)sizemode)r=   rN   
functionalinterpolatepermuter;   rB   )rm   rn   s     r-   _interpolaterv   l   sn    qQ 	!!a#--a0 	" 	

 
	Ar,   attnqq_thwk_thw	rel_pos_h	rel_pos_w	rel_pos_tc                    |\  }}}	|\  }
}}t        dt        ||      z  dz
        }t        dt        |	|      z  dz
        }t        dt        ||
      z  dz
        }t        ||z  d      }t        ||z  d      }t        j                  |      d d d f   |z  t        j                  |      d d d f   d|z
  z   |z  z
  }t        ||	z  d      }t        |	|z  d      }t        j                  |	      d d d f   |z  t        j                  |      d d d f   d|z
  z   |z  z
  }t        |
|z  d      }t        ||
z  d      }t        j                  |      d d d f   |z  t        j                  |
      d d d f   d|
z
  z   |z  z
  }t	        ||      }t	        ||      }t	        ||      }||j                            }||j                            }||j                            }|j                  \  }}}}|d d d d dd f   j                  |||||	|      } t        j                  d| |      }!t        j                  d| |      }"| j                  dddddd	      j                  |||z  |z  |	z  |      } t        j                  | |j                  dd            j                  dd      }#|#j                  ||||	||
      j                  dddddd	      }#|!d d d d d d d d d d d d d d f   |"d d d d d d d d d d d d d d f   z   |#d d d d d d d d d d d d d d f   z   j                  ||||z  |	z  |
|z  |z        }$| d d d d dd dd fxx   |$z  cc<   | S )
Nr   r2         ?zbythwc,hkc->bythwkzbythwc,wkc->bythwkr   r   rV      )r)   maxrZ   arangerv   longr=   r]   einsumru   matmulr\   view)%rw   rx   ry   rz   r{   r|   r}   q_tq_hq_wk_tk_hk_wdhdwdt	q_h_ratio	k_h_ratiodist_h	q_w_ratio	k_w_ratiodist_w	q_t_ratio	k_t_ratiodist_tRhRwRtra   n_head_r:   r_qrel_h_qrel_w_qrel_q_trel_poss%                                        r-   _add_rel_posr   {   sy    MCcMCc	QS#"	#B	QS#"	#B	QS#"	#B C#Is#IC#Is#I\\#q$w')3u||C7Hq7QUX[^U^7_cl6llFC#Is#IC#Is#I\\#q$w')3u||C7Hq7QUX[^U^7_cl6llFC#Is#IC#Is#I\\#q$w')3u||C7Hq7QUX[^U^7_cl6llF Y+IY+IY+I	6;;=	!B	6;;=	!B	6;;=	!BAvq#
Aq!"H+

ac3
<Cll/b9Gll/b9G
++aAq!Q
'
/
/QZ#5E5KS
QCll3Q 23==aCGll1fc3S9AA!Q1aQRSG 	1aAtQ,-
!Q1atQ.
/	0
!Q1aD$.
/	0 gasS#)c/:	  	Aqr12'!Kr,   shortcutresidual_with_cls_embedc           	          |r| j                  |       | S | d d d d dd d d fxx   |d d d d dd d d f   z  cc<   | S r1   )add_)r6   r   r   s      r-   _add_shortcutr      sF    	x H 	
!QA+(1aQ;//Hr,   r   r   c                       e Zd Zdej                  fdee   dedededee   dee   dee   d	ee   d
edededede	dej                  f   ddf fdZdej                  deeeef   deej                  eeeef   f   fdZ xZS )MultiscaleAttention        
input_size	embed_dim
output_dimr   r"   r#   r$   r%   residual_poolr   rel_pos_embeddropout
norm_layer.r/   Nc                 "   t         |           || _        || _        || _        ||z  | _        dt        j                  | j
                        z  | _        |	| _	        |
| _
        t        j                  |d|z        | _        t        j                  ||      g}|dkD  r&|j                  t        j                  |d             t        j                   | | _        d | _        t'        |      dkD  st'        |      dkD  rt|D cg c]  }t)        |dz         }}t+        t        j,                  | j
                  | j
                  |||| j
                  d	       || j
                              | _        d | _        d | _        t'        |      dkD  st'        |      dkD  r|D cg c]  }t)        |dz         }}t+        t        j,                  | j
                  | j
                  |||| j
                  d	       || j
                              | _        t+        t        j,                  | j
                  | j
                  |||| j
                  d	       || j
                              | _        d | _        d | _        d | _        |rt9        |dd        }t;        |      d
kD  r||d   z  n|}t;        |      d
kD  r||d   z  n|}dt9        ||      z  dz
  }d|d
   z  dz
  }t        j<                  t?        j@                  || j
                              | _        t        j<                  t?        j@                  || j
                              | _        t        j<                  t?        j@                  || j
                              | _        t        jB                  jE                  | j2                  d       t        jB                  jE                  | j4                  d       t        jB                  jE                  | j6                  d       y y c c}w c c}w )Nr   r   r   Tinplacer2   r   F)stridepaddinggroupsbiasr   {Gz?std)#rK   rL   r   r   r   head_dimmathsqrtscalerr   r   rN   LinearqkvrM   DropoutrO   projectpool_qr5   r)   rE   Conv3dpool_kpool_vr{   r|   r}   r   len	ParameterrZ   zerosinittrunc_normal_)rQ   r   r   r   r   r"   r#   r$   r%   r   r   r   r   r   rR   rx   	padding_qkv
padding_kvrq   q_sizekv_sizespatial_dimtemporal_dimrS   s                           r-   rL   zMultiscaleAttention.__init__   sL     	"$""i/DIIdmm44*'>$99YJ7#%99Z#D"ES=MM"**Wd;<}}f-+/?Q%/A"5.67Q!V7I7		MMMM#%== 4==)DK ,0+/a5#3a#71:;2#bAg,;J;		MMMM$&== 4==)DK 		MMMM$&== 4==)DK 261515z!"~&D,/MA,=TXa[(4F.1)nq.@dil*dGc&'22Q6Kz!},q0L\\%++k4==*QRDN\\%++k4==*QRDN\\%++lDMM*RSDNGG!!$..d!;GG!!$..d!;GG!!$..d!; ] 8" <s   P Pr6   rT   c           	         |j                   \  }}}| j                  |      j                  ||d| j                  | j                        j                  dd      j                  d      \  }}}| j                  | j                  ||      \  }}	n|}	| j                  | j                  ||      d   }| j                  | j                  ||      \  }}t        j                  | j                  |z  |j                  dd            }
| j                  G| j                  ;| j                  /t!        |
|||	| j                  | j                  | j                        }
|
j#                  d      }
t        j                  |
|      }| j$                  rt'        ||| j(                         |j                  dd      j                  |d| j*                        }| j-                  |      }||fS )Nr   r2   r   rY   r   rX   )r=   r   r]   r   r   r\   unbindr   r   r   rZ   r   r   r{   r|   r}   r   softmaxr   r   r   r   r   )rQ   r6   rT   ra   rb   rc   rx   kr4   rz   rw   s              r-   rg   zMultiscaleAttention.forward   s   ''1a((1+%%aAt~~t}}MWWXY[\]ddijdk1a;;"{{1c*HAuE;;"As#A&A;;"[[C(FAs||DKK!OQ[[A->?>>%$..*DIcD |||#LLq!!Q < <=KK1%%aT__=LLO#vr,   )r&   r'   r(   rN   	LayerNormr   r)   ri   floatr   rh   rL   rZ   rj   r   rg   rk   rl   s   @r-   r   r      s    /1||Z<IZ< Z< 	Z<
 Z< s)Z< 9Z< s)Z< 9Z< Z< "&Z< Z< Z< S"))^,Z< 
Z<x   E#sC-,@  U5<<Y^_bdgil_lYmKmEn  r,   r   c                        e Zd Zddej                  fdee   dededededede	d	e	d
e
dej                  f   ddf fdZdej                  deeeef   deej                  eeeef   f   fdZ xZS )MultiscaleBlockr   r   cnfr   r   r   proj_after_attnr   stochastic_depth_probr   .r/   Nc
                    t         |           || _        d | _        t	        |j
                        dkD  ro|j
                  D 
cg c]  }
|
dkD  r|
dz   n|
 }}
|D cg c]  }t        |dz         }}t        t        j                  ||j
                  |      d       | _        |r|j                  n|j                  } |	|j                        | _         |	|      | _        t        | j                  t        j                        | _        t#        ||j                  ||j$                  |j&                  |j(                  |j
                  |j*                  |||||	      | _        t/        |d|z  |j                  gt        j0                  |d       | _        t5        |d      | _        d | _        |j                  |j                  k7  r0t        j:                  |j                  |j                        | _        y y c c}
w c c}w )Nr2   r   )r   r   )	r"   r#   r$   r%   r   r   r   r   r   rV   )activation_layerr   r   row)rK   rL   r   	pool_skipr5   r$   r)   rE   rN   	MaxPool3dr!   r    norm1norm2
isinstanceBatchNorm1dneeds_transposalr   r   r"   r#   r%   rw   r   GELUmlpr   stochastic_depthr   r   )rQ   r   r   r   r   r   r   r   r   r   r.   kernel_skipr   padding_skipattn_dimrS   s                  r-   rL   zMultiscaleBlock.__init__D  s    	..2":=,,GQAE1q5q0GKG1<=ACQK=L=![|TVZDN +:3&&s?Q?Q 2 23
)
 *4::r~~ F'MM\\mm\\mm''$;!
	 \3../WW
 !00Eu M,0!4!4499S%7%79L9LMDL 5M H=s   G9G>r6   rT   c                    | j                   r1| j                  |j                  dd            j                  dd      n| j                  |      }| j                  ||      \  }}| j                  | j
                  s|n| j	                  |      }| j                  |n| j                  ||      d   }|| j                  |      z   }| j                   r1| j                  |j                  dd            j                  dd      n| j                  |      }| j                  | j
                  r|n| j	                  |      }|| j                  | j                  |            z   |fS )Nr2   r   r   )
r   r   r\   rw   r   r   r   r   r   r   )	rQ   r6   rT   x_norm1x_attnthw_newx_skipx_norm2x_projs	            r-   rg   zMultiscaleBlock.forward~  s-   CGCXCX$**Q[[A./99!Q?^b^h^hij^k))GS1%T-A-AAt||T[G\nn,$..C2H2KT**622CGCXCX$**Q[[A./99!Q?^b^h^hij^kll*d.B.BU\H]--dhhw.?@@'IIr,   )r&   r'   r(   rN   r   r   r)   r   ri   r   r   rh   rL   rZ   rj   r   rg   rk   rl   s   @r-   r   r   C  s     '*/1||8NI8N 8N 	8N
 "&8N 8N 8N 8N  %8N S"))^,8N 
8Nt
J 
JE#sC-,@ 
JU5<<Y^_bdgil_lYmKmEn 
Jr,   r   c            
       v     e Zd Zdedeeef   dededdf
 fdZdej                  dej                  fd	Z	 xZ
S )
PositionalEncoding
embed_sizespatial_sizetemporal_sizer   r/   Nc                 (   t         |           || _        || _        t	        j
                  t        j                  |            | _        d | _	        d | _
        d | _        |st	        j
                  t        j                  | j                  d   | j                  d   z  |            | _	        t	        j
                  t        j                  | j                  |            | _
        t	        j
                  t        j                  |            | _        y y )Nr   r2   )rK   rL   r   r   rN   r   rZ   r   r`   spatial_postemporal_pos	class_pos)rQ   r   r   r   r   rS   s        r-   rL   zPositionalEncoding.__init__  s    (*<<J(?@374815!||EKK8I8I!8LtO`O`abOc8ceo,pqD "U[[9K9KZ-X YD\\%++j*ABDN r,   r6   c                    | j                   j                  |j                  d      d      j                  d      }t	        j
                  ||fd      }| j                  | j                  | j                  | j                  j                  \  }}t	        j                  | j                  |d      }|j                  | j                  j                  d      j                  | j                  dd      j                  d|             t	        j
                  | j                  j                  d      |fd      j                  d      }|j                  |       |S )Nr   rX   r2   rY   )r`   expandrq   r;   rZ   r_   r   r   r   r=   repeat_interleaver   r   r]   )rQ   r6   r`   hw_sizer   pos_embeddings         r-   rg   zPositionalEncoding.forward  s   &&--affQi<FFqIII{A&A.'D,=,=,IdnnNh"&"2"2"8"8GZ!33D4E4EwTUVMt//99!<CCDDVDVXZ\^_gghjlvwx!IIt~~'?'?'BM&RXYZddefgMFF=!r,   )r&   r'   r(   r)   r   ri   rL   rZ   rj   rg   rk   rl   s   @r-   r   r     sW    C3 CeCHo CVY Cjn Csw C %,, r,   r   c            $       ,    e Zd Z	 	 	 	 	 	 	 	 	 ddeeef   dedee   dedededed	ed
ededede	e
dej                  f      de	e
dej                  f      deeeef   deeeef   deeeef   ddf" fdZdej                  dej                  fdZ xZS )r   Nr   r   block_settingr   r   r   r   r   attention_dropoutr   num_classesblock.r   patch_embed_kernelpatch_embed_stridepatch_embed_paddingr/   c                    t         |           t        |        t        |      }|dk(  rt	        d      |t
        }|t        t        j                  d      }t        j                  d|d   j                  |||      | _        t        |f|z   | j                  j                        D cg c]
  \  }}||z   }}}t        |d   j                  |d   |d	   f|d   |
      | _        t        j                          | _        t%        |      D ]~  \  }}|
|z  |dz
  z  }| j"                  j'                   ||||||||	||	             t        |j(                        dkD  sTt        ||j(                        D cg c]
  \  }}||z   }}}  ||d   j*                        | _        t        j.                  t        j0                  |d      t        j2                  |d   j*                  |            | _        | j7                         D ]l  }t9        |t        j2                        r~t        j:                  j=                  |j>                  d       t9        |t        j2                        sd|j@                  qt        j:                  jC                  |j@                  d       t9        |t        j                        ro|j>                  *t        j:                  jC                  |j>                  d       |j@                  t        j:                  jC                  |j@                  d       %t9        |t              s7|jE                         D ]#  }t        j:                  j=                  |d       % o yc c}}w c c}}w )a  
        MViT main class.

        Args:
            spatial_size (tuple of ints): The spacial size of the input as ``(H, W)``.
            temporal_size (int): The temporal size ``T`` of the input.
            block_setting (sequence of MSBlockConfig): The Network structure.
            residual_pool (bool): If True, use MViTv2 pooling residual connection.
            residual_with_cls_embed (bool): If True, the addition on the residual connection will include
                the class embedding.
            rel_pos_embed (bool): If True, use MViTv2's relative positional embeddings.
            proj_after_attn (bool): If True, apply the projection after the attention.
            dropout (float): Dropout rate. Default: 0.0.
            attention_dropout (float): Attention dropout rate. Default: 0.0.
            stochastic_depth_prob: (float): Stochastic depth rate. Default: 0.0.
            num_classes (int): The number of classes.
            block (callable, optional): Module specifying the layer which consists of the attention and mlp.
            norm_layer (callable, optional): Module specifying the normalization layer to use.
            patch_embed_kernel (tuple of ints): The kernel of the convolution that patchifies the input.
            patch_embed_stride (tuple of ints): The stride of the convolution that patchifies the input.
            patch_embed_padding (tuple of ints): The padding of the convolution that patchifies the input.
        r   z+The configuration parameter can't be empty.Ngư>)epsr   )in_channelsout_channelskernel_sizer   r   r2   r   )r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   rX   Tr   r   r   r   )#rK   rL   r   r   r<   r   r   rN   r   r   r    	conv_projzipr   r   pos_encoding
ModuleListblocks	enumeraterM   r$   r!   rG   rO   r   r   headmodulesr   r   r   weightr   	constant_
parameters)rQ   r   r   r  r   r   r   r   r   r  r   r  r	  r   r
  r  r  total_stage_blocksrq   r   r   stage_block_idr   sd_probmweightsrS   s                             r-   rL   zMViT.__init__  s   R 	
 	D! /"JKK=#E 48J &q)88*%'
 :=m=MP\=\^b^l^l^s^s9tuvdfnu
u /$Q'66$Q-A7$Q-'	
 mmo#,]#; 	`NC+n<@RUX@XYGKK)"/,C"/$3-*1)
 3<< 1$ADZQTQ]Q]A^_vdfn_
_'	`( }R0@@A	 MMJJw-IImB'77E
	
  	=A!RYY'%%ahhD%9a+0BGG%%affc2Ar||,88'GG%%ahh466%GG%%affc2A12 ||~ =GGG))'t)<=	=Q v> `s   .M==Nr6   c                    t        |dd      d   }| j                  |      }|j                  d      j                  dd      }| j	                  |      }| j                  j
                  f| j                  j                  z   }| j                  D ]  } |||      \  }} | j                  |      }|d d df   }| j                  |      }|S )Nr   r   r   r2   )
r@   r  flattenr\   r  r   r   r  rG   r  )rQ   r6   rT   r	  s       r-   rg   zMViT.forward!  s    q!Q"NN1IIaL""1a( a    ..043D3D3Q3QQ[[ 	#E1c]FAs	#IIaL adGIIaLr,   )	g      ?r   r   i  NN)r      r$  )r   rV   rV   )r2   r   r   )r&   r'   r(   r   r)   r
   r   ri   r   r	   r   rN   rh   rL   rZ   rj   rg   rk   rl   s   @r-   r   r     sM    #&'*489=3<3<4=#v=CHov= v=  .	v=
 v= "&v= v= v= v= !v=  %v= v= bii01v= Xc299n56v= "#sC-0v=  "#sC-0!v=" #3S=1#v=$ 
%v=p %,, r,   r   r  r   r!  progresskwargsc                 >   |~t        |dt        |j                  d                |j                  d   d   |j                  d   d   k(  sJ t        |d|j                  d          t        |d|j                  d          |j                  dd	      }|j                  dd
      }t	        d||| |j                  dd      |j                  dd      |j                  dd      |j                  dd      |d|}|"|j                  |j                  |d             |S )Nr  
categoriesmin_sizer   r2   r   r   min_temporal_size   r,     r   Fr   Tr   r   )r   r   r  r   r   r   r   r   )r%  
check_hashr+   )r   r   metapopr   load_state_dictget_state_dict)r  r   r!  r%  r&  r   r   models           r-   _mvitr4  8  s$    fmSl9S5TU||J'*gll:.Fq.IIIIfngll:6NOfow||DW7XY::nj9LJJ3M 
!##jj%8 &

+Dd Kjj%8

#4e<3
 
E g44hSW4XYLr,   c                   Z    e Zd Z ed eedddd      ddedd	d
ddddiddd	      ZeZy)r   z:https://download.pytorch.org/models/mvit_v1_b-dbeb1030.pthr+     ?r9  r9  ?r;  r;  	crop_sizeresize_sizemeanr   r-  zShttps://github.com/facebookresearch/pytorchvideo/blob/main/docs/source/model_zoo.mdThe weights were ported from the paper. The accuracies are estimated on video-level with parameters `frame_rate=7.5`, `clips_per_video=5`, and `clip_len=16`ip.Kinetics-400gJ+S@gh|?eW@zacc@1zacc@5guVQ@g rxa@	r)  r*  r(  recipe_docs
num_params_metrics_ops
_file_sizeurl
transformsr/  N	r&   r'   r(   r   r   r   r   KINETICS400_V1DEFAULTr+   r,   r-   r   r   Y  sf    H #%
 #!#1k[ ###! !#
N: Gr,   r   c                   Z    e Zd Z ed eedddd      ddedd	d
ddddiddd	      ZeZy)r   z:https://download.pytorch.org/models/mvit_v2_s-ae3be167.pthr+  r6  r8  r:  r<  r-  zChttps://github.com/facebookresearch/SlowFast/blob/main/MODEL_ZOO.mdr@  irA  g r0T@g(\W@rB  guVP@g?5^I|`@rC  rJ  NrM  r+   r,   r-   r   r   z  sf    H #%
 #!#1[[ ###! !#
N: Gr,   r   
pretrained)r!  T)r!  r%  c                 ~   t         j                  |       } g dg dg dg g dg g dg g g g g g g g g g g dg gg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgg g dg g dg g g g g g g g g g g dg gg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgd	}g }t        t        |d
               D ]M  }|j	                  t        |d
   |   |d   |   |d   |   |d   |   |d   |   |d   |   |d   |   	             O t        ddd|dd|j                  dd      | |d|S )a  
    Constructs a base MViTV1 architecture from
    `Multiscale Vision Transformers <https://arxiv.org/abs/2104.11227>`__.

    .. betastatus:: video module

    Args:
        weights (:class:`~torchvision.models.video.MViT_V1_B_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.MViT_V1_B_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.MViT``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/mvit.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.MViT_V1_B_Weights
        :members:
    r2   r   r   rV   rV   rV   rV   rV   rV   rV   rV   rV   rV   rV      rT  `      rW    rX  rX  rX  rX  rX  rX  rX  rX  rX  rX     rY  )rW  rW  rX  rX  rX  rX  rX  rX  rX  rX  rX  rX  rX  rY  rY  rY  r   r   r   r2   r   r   r2   rT  rT  r2   rV   rV   r2   r2   r2   r   r    r!   r"   r#   r$   r%   r   r    r!   r"   r#   r$   r%   r+  r-  Fr   皙?)r   r   r  r   r   r   r!  r%  r+   )r   verifyranger   rM   r   r4  r0  r!  r%  r&  configr  is         r-   r   r     s   2  &&w/G FikB	2r2r2r2rSUWY[dfhi!
$ B	2r2r2r2rSUWY[dfhi!
1*FX M3vk*+, 
 -a0%&67: &'8 9! <
+A. -a0
+A. -a0
	

  
# %$jj)@#F
 
 
r,   c                    t         j                  |       } g dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dg dgd	}g }t        t        |d
               D ]M  }|j	                  t        |d
   |   |d   |   |d   |   |d   |   |d   |   |d   |   |d   |   	             O t        ddd|dddd|j                  dd      | |d
|S )aC  Constructs a small MViTV2 architecture from
    `Multiscale Vision Transformers <https://arxiv.org/abs/2104.11227>`__ and
    `MViTv2: Improved Multiscale Vision Transformers for Classification
    and Detection <https://arxiv.org/abs/2112.01526>`__.

    .. betastatus:: video module

    Args:
        weights (:class:`~torchvision.models.video.MViT_V2_S_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.MViT_V2_S_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.MViT``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/mvit.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.MViT_V2_S_Weights
            :members:
    rS  )rV  rV  rW  rW  rX  rX  rX  rX  rX  rX  rX  rX  rX  rX  rX  rY  rU  rZ  r^  r[  r\  r]  r_  r   r    r!   r"   r#   r$   r%   r+  r-  TFr   r`  )
r   r   r  r   r   r   r   r   r!  r%  r+   )r   ra  rb  r   rM   r   r4  r0  rc  s         r-   r   r     s   4  &&w/G Fhj!
& !
& !
& !
uLF\ M3vk*+, 
 -a0%&67: &'8 9! <
+A. -a0
+A. -a0
	

  # %$jj)@#F  r,   );r   dataclassesr   	functoolsr   typingr   r   r   r   r	   r
   r   rZ   torch.fxtorch.nnrN   opsr   r   transforms._presetsr   utilsr   _apir   r   r   _metar   _utilsr   r   __all__r   r)   r5   rj   r@   rC   fxwraprh   rE   rv   r   ri   r   r   r   r   r   r   r4  r   r   rN  r   r   r+   r,   r-   <module>ru     s[    !  G G G    ' 6 ( 7 7 + C   Xc] s %,, C S U5<<Y\K\E]  # 3 C TYT`T`  l  j )299 )XELL S U\\ 9
,,9||9 c39 c3	9
 ||9 ||9 ||9 \\9xU\\ U\\ TX  n  o }")) }@EJbii EJP :M299 M`&  k" 	
  
B B B ,0A0P0P!QR8<t ](#45 ] ]_b ]gk ] S ]@ ,0A0P0P!QR8<t B(#45 B B_b Bgk B S Br,   