
    kh                     h   d Z ddlZddlmZ ddlmZ ddlmZmZ ddl	m
Z
mZmZmZ ddlZddlmZ ddlmZmZ dd	lmZmZmZmZmZ d
dlmZ d
dlmZ d
dlmZ d
dlm Z  d
dl!m"Z"m#Z# ddgZ$e G d d             Z%d Z& G d dejN                        Z(e	 dVdee)   de*deejV                  eejV                     f   fd       Z,e	 dWde)deejV                     deejV                  ee)   f   fd       Z-edejV                  dejV                  de*d ee)   d!ee)   d"ejV                  d#ejV                  fd$       Z. G d% d&ejN                        Z/ G d' d(ejN                        Z0 G d) d*ejN                        Z1 G d+ d,ejN                        Z2 G d- dejN                        Z3d. Z4 e5 e%d/0       e%d10       e%d20       e%d3d4d5d67       e%d1d8       e%d2d8       e%d3d4d5dd9       e%d:d;d<dd9      =      Z6dXd>Z7dYd@Z8 e# e8dAdBC       e8dDdBC       e8dEdBC       e8dFdBC       e8d?G       e8dHdBdIJ       e8dKdBdIJ       e8dLdBdIJ      dM      Z9e"dZde3fdN       Z:e"dZde3fdO       Z;e"dZde3fdP       Z<e"dZde3fdQ       Z=e"dZde3fdR       Z>e"dZde3fdS       Z?e"dZde3fdT       Z@e"dZde3fdU       ZAy)[a   Multi-Scale Vision Transformer v2

@inproceedings{li2021improved,
  title={MViTv2: Improved multiscale vision transformers for classification and detection},
  author={Li, Yanghao and Wu, Chao-Yuan and Fan, Haoqi and Mangalam, Karttikeya and Xiong, Bo and Malik, Jitendra and Feichtenhofer, Christoph},
  booktitle={CVPR},
  year={2022}
}

Code adapted from original Apache 2.0 licensed impl at https://github.com/facebookresearch/mvit
Original copyright below.

Modifications and timm support by / Copyright 2022, Ross Wightman
    N)OrderedDict)	dataclass)partialreduce)UnionListTupleOptional)nnIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)MlpDropPathtrunc_normal_tf_get_norm_layer	to_2tuple   )build_model_with_cfg)feature_take_indices)register_notrace_function)
checkpoint)register_modelgenerate_default_cfgsMultiScaleVitMultiScaleVitCfgc                      e Zd ZU dZeedf   ed<   dZeeeedf   f   ed<   dZ	eeeedf   f   ed<   dZ
eed	<   d
Zeed<   dZeed<   dZeed<   d
Zeed<   d
Zeed<   dZeed<   dZeed<   dZeeef   ed<   dZeeeeef         ed<   dZeeeeef         ed<   dZeeeef      ed<   dZeeef   ed<   dZeeef   ed<   dZeeef   ed<   d Zeed!<   d"Zeed#<   d$Zeeeeef   f   ed%<   d&Z eeeeef   f   ed'<   d(Z!eed)<   d* Z"y)+r            r    .depths`   	embed_dimr   	num_heads      @	mlp_ratioF
pool_firstTexpand_attnqkv_biasuse_cls_tokenuse_abs_posresidual_poolingconvmoder    r    
kernel_qkv)r   r   r   r   r3   r3   stride_qN	stride_kv   r7   stride_kv_adaptive   r:   patch_kernelpatch_stridepatch_paddingmax	pool_typespatialrel_pos_typegelu	act_layer	layernorm
norm_layergư>norm_epsc           	      &    t         j                        }t         j                  t        t
        f      s"t	         fdt        |      D               _        t         j                        |k(  sJ t         j                  t        t
        f      s"t	         fdt        |      D               _        t         j                        |k(  sJ  j                   j                   j                  }g }t        |      D ]x  }t         j                  |         dkD  rAt        t        |            D cg c]$  }t        ||    j                  |   |   z  d      & }}|j                  t	        |             z t	        |       _	        y y y c c}w )Nc              3   B   K   | ]  }j                   d |z  z    ywr   N)r$   .0iselfs     N/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/mvitv2.py	<genexpr>z1MultiScaleVitCfg.__post_init__.<locals>.<genexpr>A        "Vq4>>AF#:"V   c              3   B   K   | ]  }j                   d |z  z    ywrI   )r%   rJ   s     rN   rO   z1MultiScaleVitCfg.__post_init__.<locals>.<genexpr>E   rP   rQ   r   )lenr"   
isinstancer$   tuplelistranger%   r8   r5   minr4   r>   append)rM   
num_stages
_stride_kvpool_kv_striderL   ds   `     rN   __post_init__zMultiScaleVitCfg.__post_init__>   sW   %
$..5$-8""VE*DU"VVDN4>>"j000$..5$-8""VE*DU"VVDN4>>"j000"".4>>3I00JN:& 9t}}Q'(1, "'s:!7" JqMT]]1-=a-@@!D"J " %%eJ&789 #>2DN 4J.
"s   4)F)#__name__
__module____qualname__r"   r	   int__annotations__r$   r   r%   r'   floatr(   boolr)   r*   r+   r,   r-   r/   strr1   r4   r
   r5   r8   r;   r<   r=   r?   rA   rC   rE   rF   r^        rN   r   r   $   s   +FE#s(O+-/IuS%S/)*/-.IuS%S/)*.IuJKHdM4K!d!D#"(Jc3h(1QHhuU38_-.Q26IxeCHo./64:sCx1:$*L%S/*$*L%S/*%+M5c?+Is!L#!-3IuS%S/)*3.9Jc5c?*+9He3rh   c                 8    t        t        j                  | d      S )Nr   )r   operatormul)iterables    rN   prodrm   U   s    (,,!,,rh   c                   ^     e Zd ZdZ	 	 	 	 	 d fd	Zdeej                  ee	   f   fdZ
 xZS )
PatchEmbedz
    PatchEmbed.
    c                 `    t         |           t        j                  |||||      | _        y )N)kernel_sizestridepadding)super__init__r   Conv2dproj)rM   dim_indim_outkernelrr   rs   	__class__s         rN   ru   zPatchEmbed.__init__^   s/     	II
	rh   returnc                     | j                  |      }|j                  d      j                  dd      |j                  dd  fS )Nr   r   )rw   flatten	transposeshaperM   xs     rN   forwardzPatchEmbed.forwardp   s9    IIaLyy|%%a+QWWRS\99rh   )r    i   r9   r6   r0   )r_   r`   ra   __doc__ru   r	   torchTensorr   rb   r   __classcell__r{   s   @rN   ro   ro   Y   s<     
$:E%,,S	"9: :rh   ro   T	feat_sizehas_cls_tokenr|   c                     |\  }}|r#| d d d d d dd d f   | d d d d dd d d f   } }nd }| j                  d||| j                  d         j                  dddd      j                         } | |fS )Nr   r   r    r   )reshaper   permute
contiguous)r   r   r   HWcls_toks         rN   reshape_pre_poolr   v   s|     DAqq!RaR{^Qq!QR{^			"aAGGBK(00Aq!<GGIAg:rh   r%   r   c                    | j                   d   | j                   d   g}| j                   d   | j                   d   z  }| j                  d|| j                   d   |      j                  dd      } |t        j                  || fd      } | |fS )Nr   r    r   r   dim)r   r   r   r   cat)r   r%   r   r   L_pooleds        rN   reshape_post_poolr      s     QWWQZ(IwwqzAGGAJ&H			"iX6@@AFAIIwl*i<rh   attnqq_sizek_size	rel_pos_h	rel_pos_wc                 R   |rdnd}|\  }}	|\  }
}t        |
|z  d      }t        ||
z  d      }t        j                  ||j                        j	                  d      |z  t        j                  |
|j                        j	                  d      |z  z
  }||
dz
  |z  z  }t        ||	z  d      }t        |	|z  d      }t        j                  |	|j                        j	                  d      |z  t        j                  ||j                        j	                  d      |z  z
  }||dz
  |z  z  }||j                            }||j                            }|j                  \  }}}}|dddd|df   j                  ||||	|      }t        j                  d||      }t        j                  d||      }| dddd|d|df   j                  |d||	|
|      |j	                  d      z   |j	                  d	      z   j                  |d||	z  |
|z        | dddd|d|df<   | S )
z1
    Spatial Relative Positional Embeddings.
    r   r   g      ?)devicer   Nzbyhwc,hkc->byhwkzbyhwc,wkc->byhwkr~   )
r>   r   aranger   	unsqueezelongr   r   einsumview)r   r   r   r   r   r   r   sp_idxq_hq_wk_hk_w	q_h_ratio	k_h_ratiodist_h	q_w_ratio	k_w_ratiodist_wrel_hrel_wBn_headq_Nr   r_qs                            rN   cal_rel_pos_typer      s,     QQFHCHC C#Is#IC#Is#ILLQXX.88<yHLLQXX.88;iGH  sQw)##FC#Is#IC#Is#ILLQXX.88<yHLLQXX.88;iGH  sQw)##Ffkkm$Efkkm$E''AvsC
Aq&'M

"
"1fc3
<CLL+S%8ELL+S%8E 	Q67FG#$))!Rc3D
//"
	
//"
	 d1b#)S3Y'	 	Avw	  Krh   c                   \     e Zd Zddddddddddej                  f fd	Zdee   fdZ xZ	S )	MultiScaleAttentionPoolFirst   Tr.   r2   r@   c           	      <   t         |           || _        || _        ||z  | _        | j                  dz  | _        || _        t        |D cg c]  }t        |dz         c}      }t        |D cg c]  }t        |dz         c}      }t        j                  |||      | _        t        j                  |||      | _        t        j                  |||      | _        t        j                  ||      | _        t        |      dk(  rt        |	      dk(  rd }t        |      dk(  rt        |
      dk(  rd }|| _        |dk(  | _        d\  | _        | _        | _        d\  | _        | _        | _        |dv rW|dk(  rt        j0                  nt        j2                  }|r |||	|      | _        |r |||
|      | _         |||
|      | _        n|d	k(  s|dk(  r|d	k(  r||z  n|}|r.t        j4                  ||||	||d
      | _         ||      | _        |rkt        j4                  ||||
||d
      | _         ||      | _        t        j4                  ||||
||d
      | _         ||      | _        nt7        d|       || _        | j8                  dk(  r|d   |d   k(  sJ |d   }t;        |	      dkD  r||	d   z  n|}t;        |
      dkD  r||
d   z  n|}dt=        ||      z  dz
  }t        j>                  tA        jB                  || j                              | _"        t        j>                  tA        jB                  || j                              | _#        tI        | jD                  d       tI        | jF                  d       || _%        y c c}w c c}w )N      r   biasr   conv_unsharedNNNavgr>   r>   r.   Frr   rs   groupsr   Unsupported model r@   r   {Gz?std)&rt   ru   r%   ry   head_dimscaler   rU   rb   r   Linearr   kvrw   rm   r/   unsharedpool_qpool_kpool_vnorm_qnorm_knorm_v	MaxPool2d	AvgPool2drv   NotImplementedErrorrA   rS   r>   	Parameterr   zerosr   r   r   r-   rM   r   ry   r   r%   r*   r/   kernel_q	kernel_kvr4   r5   r   rA   r-   rE   r   	padding_qkv
padding_kvpool_opdim_convsizer   kv_size
rel_sp_dimr{   s                            rN   ru   z%MultiScaleAttentionPoolFirst.__init__   sN   " 	"9,]]d*
*913qAv;9:	9=RCaL=>
3h73h73h7IIgw/	 >Q4>Q#6H	?aDOq$8I	/0@-T[$+0@-T[$+>!&*embllG%h)D%iJG%iJGV^t6+/6>si'sH ii#%# )2 ii$&# )2 ii$&# )2%(:4&&ABB )	)Q<9Q<///Q<D,/MA,=TXa[(4F.1)nq.@dil*dGS11A5J\\%++j$--*PQDN\\%++j$--*PQDNT^^6T^^6 0W :=s   N1Nr   c           	         |j                   \  }}}| j                  rdn| j                  }|j                  |||d      j	                  dddd      }|x}x}}	| j
                  Ft        ||| j                        \  }}
| j                  |      }t        || j                  |
      \  }}n|}| j                  | j                  |      }| j                  Ft        ||| j                        \  }}| j                  |      }t        || j                  |      \  }}n|}| j                  | j                  |      }| j                  Ft        |	|| j                        \  }	}| j                  |	      }	t        |	| j                  |      \  }	}n|}| j                  | j                  |	      }	|d   |d   z  t        | j                        z   }|j                  dd      j                  ||d      }| j!                  |      j                  ||| j                  d      j                  dd      }|d   |d   z  t        | j                        z   }|j                  dd      j                  ||d      }| j#                  |      j                  ||| j                  d      }|d   |d   z  t        | j                        z   }|	j                  dd      j                  ||d      }	| j%                  |	      j                  ||| j                  d      j                  dd      }	|| j&                  z  |z  }| j(                  dk(  r/t+        ||| j                  ||| j,                  | j.                        }|j1                  d      }||	z  }| j2                  r||z   }|j                  dd      j                  |d| j4                        }| j7                  |      }||fS )Nr   r   r   r   r    r@   r   )r   r   r%   r   r   r   r   r   r   r   r   r   r   r   rb   r   r   r   r   r   rA   r   r   r   softmaxr-   ry   rw   )rM   r   r   r   N_fold_dimr   r   r   q_tokr   k_tokr   v_tokv_sizer   k_Nv_Nr   s                       rN   r   z$MultiScaleAttentionPoolFirst.forward+  sn   ''1a14>>IIaHb)11!Q1=A;;"'9d6H6HIHAuAA)!T^^UCIAvF;;"AA;;"'9d6H6HIHAuAA)!T^^UCIAvF;;"AA;;"'9d6H6HIHAuAA)!T^^UCIAvF;;"AAQi&)#c$*<*<&==KK1%%ab1FF1Iadnnb9CCAqIQi&)#c$*<*<&==KK1%%ab1FF1Iadnnb9Qi&)#c$*<*<&==KK1%%ab1FF1Iadnnb9CCAqIDJJ!#	)#""D |||#1H  AAKK1%%aT\\:IIaL&yrh   
r_   r`   ra   r   	LayerNormru   r   rb   r   r   r   s   @rN   r   r      sC     "!||b1HBDI Brh   r   c                   \     e Zd Zddddddddddej                  f fd	Zdee   fdZ xZ	S )	MultiScaleAttentionr   Tr.   r2   r@   c           	         t         |           || _        || _        ||z  | _        | j                  dz  | _        || _        t        |D cg c]  }t        |dz         c}      }t        |D cg c]  }t        |dz         c}      }t        j                  ||dz  |      | _        t        j                  ||      | _        t        |      dk(  rt        |	      dk(  rd }t        |      dk(  rt        |
      dk(  rd }|| _        |dk(  | _        d\  | _        | _        | _        d\  | _        | _        | _        |dv rW|d	k(  rt        j,                  nt        j.                  }|r |||	|      | _        |r |||
|      | _         |||
|      | _        n|d
k(  s|dk(  r|d
k(  r||z  n|}|r.t        j0                  ||||	||d      | _         ||      | _        |rkt        j0                  ||||
||d      | _         ||      | _        t        j0                  ||||
||d      | _         ||      | _        nt3        d|       || _        | j4                  dk(  r|d   |d   k(  sJ |d   }t7        |	      dkD  r||	d   z  n|}t7        |
      dkD  r||
d   z  n|}dt9        ||      z  dz
  }t        j:                  t=        j>                  || j                              | _         t        j:                  t=        j>                  || j                              | _!        tE        | j@                  d       tE        | jB                  d       || _#        y c c}w c c}w )Nr   r   r    r   r   r   r   r   r>   r.   Fr   r   r@   r   r   r   )$rt   ru   r%   ry   r   r   r   rU   rb   r   r   qkvrw   rm   r/   r   r   r   r   r   r   r   r   r   rv   r   rA   rS   r>   r   r   r   r   r   r   r-   r   s                            rN   ru   zMultiScaleAttention.__init__q  s*   " 	"9,]]d*
*913qAv;9:	9=RCaL=>
99S'A+H=IIgw/	 >Q4>Q#6H	?aDOq$8I	/0@-T[$+0@-T[$+>!&*embllG%h)D%iJG%iJGV^t6/3v~w)+7H ii#%# )2 ii$&# )2 ii$&# )2%(:4&&ABB )	)Q<9Q<///Q<D,/MA,=TXa[(4F.1)nq.@dil*dGS11A5J\\%++j$--*PQDN\\%++j$--*PQDNT^^6T^^6 0S :=s   M1M"r   c           	         |j                   \  }}}| j                  |      j                  ||d| j                  d      j	                  ddddd      }|j                  d      \  }}}	| j                  Ft        ||| j                        \  }}
| j                  |      }t        || j                  |
      \  }}n|}| j                  | j                  |      }| j                  Ft        ||| j                        \  }}| j                  |      }t        || j                  |      \  }}n|}| j                  | j                  |      }| j                  Et        |	|| j                        \  }	}| j                  |	      }	t        |	| j                  |      \  }	}| j                  | j                  |	      }	|| j                  z  |j!                  dd      z  }| j"                  d	k(  r/t%        ||| j                  ||| j&                  | j(                        }|j+                  d      }||	z  }| j,                  r||z   }|j!                  dd      j                  |d| j.                        }| j1                  |      }||fS )
Nr    r   r   r   r   r7   r   r~   r@   )r   r   r   r%   r   unbindr   r   r   r   r   r   r   r   r   r   r   rA   r   r   r   r   r-   ry   rw   )rM   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s                   rN   r   zMultiScaleAttention.forward  s=   ''1ahhqk!!!Q4>>2>FFq!QPQSTU***#1a;;"'9d6H6HIHAuAA)!T^^UCIAvF;;"AA;;"'9d6H6HIHAuAA)!T^^UCIAvF;;"AA;;"'9d6H6HIHAuAA$Q>DAq;;"AADJJ!++b""55	)#""D |||#1H  AAKK1%%aT\\:IIaL&yrh   r   r   s   @rN   r   r   p  sA     "!||`1D3DI 3rh   r   c                   t     e Zd Zdddej                  ddddddddddf fd	Zd	ee   fd
Zd	ee   fdZ	 xZ
S )MultiScaleBlockr&   T        r2   r.   Fr@   c                 F   t         |           ||k7  }|| _        || _        || _         ||      | _        |r|rt        j                  ||      nd | _        |rat        |      dkD  rS|D cg c]  }|dkD  r|dz   n| }}|}|D cg c]  }t        |dz         }}t        j                  |||      | _        nd | _        |r|n|}|rt        nt        } |||||||	|
|||||||      | _        |dkD  rt!        |      nt        j"                         | _         ||      | _        |}|r|st        j                  ||      nd | _        t+        |t        ||z        |      | _        |dkD  rt!        |      | _        y t        j"                         | _        y c c}w c c}w )Nr   r   )r%   r   r*   r   r   r4   r5   rE   r   r/   rA   r-   r   )in_featureshidden_featuresout_features)rt   ru   r   ry   r   norm1r   r   shortcut_proj_attnrm   rb   r   shortcut_pool_attnr   r   r   r   Identity
drop_path1norm2shortcut_proj_mlpr   mlp
drop_path2)rM   r   ry   r%   r   r'   r*   	drop_pathrE   r   r   r4   r5   r/   r   r)   r(   rA   r-   proj_neededskernel_skipstride_skipskippadding_skipatt_dim
attn_layermlp_dim_outr{   s                               rN   ru   zMultiScaleBlock.__init__
  s   * 	Wn*_
=H["))C"9^bX*:BCQAE1q5q0CKC"K7BCtC	NCLC&(ll;\&ZD#&*D#('c5?1EX
!'%-
	  2;S(9-bkkm(
<GP[3!8ae) 34$

 2;S(9-bkkmG DCs   ,FFr   c                    | j                   |S | j                  r|d d d dd d f   |d d dd d d f   }}nd }|j                  \  }}}|\  }}|j                  ||||      j	                  dddd      j                         }| j                  |      }|j                  ||d      j                  dd      }|t        j                  ||fd      }|S )Nr   r   r    r   r   r   )	r  r   r   r   r   r   r   r   r   )	rM   r   r   r   r   LCr   r   s	            rN   _shortcut_poolzMultiScaleBlock._shortcut_poolN  s    ""*H1bqb!8a12qkQGG''1a1IIaAq!))!Q15@@B##A&IIaB))!Q/		7A,A.Arh   c                    | j                  |      }| j                  |n| j                  |      }| j                  ||      }| j                  ||      \  }}|| j	                  |      z   }| j                  |      }| j                  |n| j                  |      }|| j                  | j                  |            z   }||fS N)	r  r  r  r   r  r  r  r	  r  )rM   r   r   x_norm
x_shortcutfeat_size_news         rN   r   zMultiScaleBlock.forward^  s    A119Qt?V?VW]?^
((Y?
99VY7=++A008Qd>T>TU[>\
&)9::-rh   )r_   r`   ra   r   r   ru   r   rb   r  r   r   r   s   @rN   r   r   	  s[     ||"!'BTH49   DI  rh   r   c                   b     e Zd Zddddddddddddej                  df fd	Zd	ee   fd
Z xZ	S )MultiScaleVitStager&   Tr.   r2   Fr@   r   c                 2   t         |           d| _        t        j                         | _        |r|f|z  }n|f|dz
  z  |fz   }t        |      D ]  }t        di d|d||   d|d|d|d|d	|	d
|
d|dk(  r|ndd|d|d|d|d|d|d|d|dt        |t        t        f      r||   n|}||   }| j
                  j                  |       |dk(  st        t        ||      D cg c]
  \  }}||z   c}}      } || _        y c c}}w )NFr   r   ry   r%   r   r'   r*   r   r   r4   r   r2   r5   r/   r   r(   rA   r-   r)   rE   r
  rg   )rt   ru   grad_checkpointingr   
ModuleListblocksrW   r   rT   rV   rU   rY   zipr   )rM   r   ry   depthr%   r   r'   r*   r/   r   r   r4   r5   r   r)   r(   rA   r-   rE   r
  out_dimsrL   attention_blockr   rr   r{   s                            rN   ru   zMultiScaleVitStage.__init__n  s   , 	"'mmozE)Hv+wj8Hu 	aA-   $ $	
 $ " " $ &'!V $  , & * "2  (!" &#$ +5Yu*N)A,T]%O( 1+CKK/Av!c)U]F^"_ldF46>"_`	1	a4 # #`s   2Dr   c                     | j                   D ]I  }| j                  r/t        j                  j	                         st        |||      \  }}> |||      \  }}K ||fS r  )r"  r   r   jitis_scriptingr   )rM   r   r   blks       rN   r   zMultiScaleVitStage.forward  s[    ;; 	1C&&uyy/E/E/G)#q)<9"1i09		1
 )|rh   r   r   s   @rN   r  r  l  sI     "!||)9#vDI rh   r  c                   v    e Zd ZdZ	 	 	 	 	 	 ddedeeef   dedee   dede	de	f fd	Z
d
 Zej                  j                  d        Zej                  j                  d d       Zej                  j                  d!d       Zej                  j                  dej&                  fd       Zd"dedee   fdZ	 	 	 	 	 d#dej,                  deeeee   f      dededededeeej,                     eej,                  eej,                     f   f   fdZ	 	 	 d$deeee   f   dedefdZd Zd defdZd Z xZS )%r   a  
    Improved Multiscale Vision Transformers for Classification and Detection
    Yanghao Li*, Chao-Yuan Wu*, Haoqi Fan, Karttikeya Mangalam, Bo Xiong, Jitendra Malik,
        Christoph Feichtenhofer*
    https://arxiv.org/abs/2112.01526

    Multiscale Vision Transformers
    Haoqi Fan*, Bo Xiong*, Karttikeya Mangalam*, Yanghao Li*, Zhicheng Yan, Jitendra Malik,
        Christoph Feichtenhofer*
    https://arxiv.org/abs/2104.11227
    cfgimg_sizein_chansglobal_poolnum_classesdrop_path_rate	drop_ratec           
      	   t         |           t        |      }t        t	        |j
                        |j                        }|| _        || _        ||j                  rdnd}|| _
        t        |j                        | _        |j                  | _        |j                  d   }	t        ||	|j                   |j"                  |j$                        | _        |d   |j"                  d   z  |d   |j"                  d   z  f}
t)        |
      }|j                  r<t+        j,                  t/        j0                  dd|	            | _        d| _        |dz   }nd| _        d | _        |}|j6                  r0t+        j,                  t/        j0                  d||	            | _        nd | _        t;        |j                        }|
}t=        |j"                        }t/        j>                  d|tA        |j                              jC                  |j                        D cg c]  }|jE                          }}t+        jF                         | _$        g | _%        tM        |      D ]  }|j                  r|j                  |   }n|j                  tO        |dz   |dz
           }tQ        d i d|	d|d	|j                  |   d
|jR                  |   d|d|jT                  d|jV                  d|jX                  d|jZ                  d|j                  d|j\                  d|j\                  d|j^                  |   d|j`                  |   d|j                  d|jb                  d|jd                  d|d||   }|t=        |j^                  |         z  }| xjJ                  tg        d| ||      gz  c_%        |}	|jh                  }| jH                  jk                  |        |	x| _6        | _7         ||	      | _8        t+        jr                  tu        dt+        jv                  | j                        fd|dkD  r t+        jx                  | jl                  |      nt+        jz                         fg            | _>        | j8                  t        | j8                  d       | j2                  t        | j2                  d       | j                  | j                         y c c}w )!N)epstokenr   r   )rx   ry   rz   rr   rs   r   r   ry   r$  r%   r   r'   r*   r/   r(   r)   r   r   r4   r5   r   rA   r-   rE   r
  zblock.)modulenum_chs	reductiondropfcr   r   rg   )Brt   ru   r   r   r   rE   rF   r0  r2  r+   r/  rU   r"   r)   r$   ro   r;   r<   r=   patch_embedrm   r   r   r   r   	cls_tokennum_prefix_tokensr,   	pos_embedrS   r>   linspacesumsplittolistr!  stagesfeature_inforW   rX   r  r%   r'   r*   r/   r(   r1   r4   r5   rA   r-   dictr   rY   num_featureshead_hidden_sizenorm
Sequentialr   Dropoutr   r  headr   apply_init_weights)rM   r,  r-  r.  r/  r0  r1  r2  rE   r$   
patch_dimsnum_patchespos_embed_dimrZ   r   curr_strider   dprrL   ry   stager{   s                        rN   ru   zMultiScaleVit.__init__  se    	X&^CNN;N
&"%(%6%6'EK&CJJ'??MM!$	%####%%
 qkS%5%5a%88(1+IYIYZ[I\:\]
:&\\%++aI*FGDN%&D"'!OM%&D"!DN'M??\\%++a	*RSDN!DN'
	#**+#(>>!^S_#U#[#[\_\f\f#ghaqxxzhhmmoz" 	&A--*--AE:>(BC&  jjm --*	
 $ --  XX >>  OO  .. a --* "//  !--!" "%!5!5#$ &%& a&'E* 3s||A//K$qclGWb"c!ddIIKKu%=	&@ 5>=D1y)	MM+RZZ/0a299T..<UWU`U`Ubc/
 # 	
 >>%T^^6>>%T^^6

4%%&] is   S c                    t        |t        j                        rjt        |j                  d       t        |t        j                        r8|j
                  +t        j                  j                  |j
                  d       y y y y )Nr   r   r   )rT   r   r   r   weightr   init	constant_)rM   ms     rN   rM  zMultiScaleVit._init_weights   sZ    a#QXX40!RYY'AFF,>!!!&&#. -?' $rh   c                     | j                         D ch c]  \  }t        fddD              r c}}S c c}}w )Nc              3   &   K   | ]  }|v  
 y wr  rg   )rK   nr   s     rN   rO   z0MultiScaleVit.no_weight_decay.<locals>.<genexpr>)  s     \!qAv\s   )r>  r   r   r<  )named_parametersany)rM   r   r   s    ` rN   no_weight_decayzMultiScaleVit.no_weight_decay&  sH    "335 ^ ^da\'[\\  ^ 	^ ^s    :c                 $    t        dddg      }|S )Nz^patch_embed)z^stages\.(\d+)N)z^norm)i )stemr"  )rE  )rM   coarsematchers      rN   group_matcherzMultiScaleVit.group_matcher+  s     -/CD
 rh   c                 4    | j                   D ]	  }||_         y r  )rC  r   )rM   enabler  s      rN   set_grad_checkpointingz$MultiScaleVit.set_grad_checkpointing3  s     	*A#)A 	*rh   r|   c                 .    | j                   j                  S r  )rK  r:  )rM   s    rN   get_classifierzMultiScaleVit.get_classifier8  s    yy||rh   c           
         || _         ||| _        t        j                  t	        dt        j
                  | j                        fd|dkD  r t        j                  | j                  |      nt        j                         fg            | _
        y )Nr9  r:  r   )r0  r/  r   rI  r   rJ  r2  r   rF  r  rK  )rM   r0  r/  s      rN   reset_classifierzMultiScaleVit.reset_classifier<  st    &"*DMM+RZZ/0a299T..<UWU`U`Ubc/
 # 	rh   r   indicesrH  
stop_early
output_fmtintermediates_onlyc                    |dv sJ d       |dk(  }g }t        t        | j                        |      \  }	}
| j                  |      \  }}|j                  d   }| j
                  6| j
                  j                  |dd      }t        j                  ||fd      }| j                  || j                  z   }t        | j                        dz
  }t        | j                        D ]  \  }} |||      \  }}||	v s|r||k(  r| j                  |      }n|}|rC| j
                  |ddddf   }|j                  ||d   |d   d      j                  dd	dd
      }|j                  |        |r|S |k(  r| j                  |      }||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )NCHWNLCz!Output shape must be NCHW or NLC.rp  r   Nr   r   r   r    r   )r   rS   rC  r;  r   r<  expandr   r   r>  	enumeraterH  r   r   rY   )rM   r   rk  rH  rl  rm  rn  r   intermediatestake_indices	max_indexr   r   
cls_tokenslast_idxfeat_idxrS  x_inters                     rN   forward_intermediatesz#MultiScaleVit.forward_intermediatesE  s   * _,Q.QQ,&"6s4;;7G"Qi ''*9GGAJ>>%..q"b9J		:q/q1A>>%DNN"At{{#a'(5 	.OHe I.LAy<'H0"iilGG~~1")!QR%.%ooa1y|RPXXYZ\]_`bcdG$$W-	.   x		!A-rh   
prune_norm
prune_headc                     t        t        | j                        |      \  }}|rt        j                         | _        |r| j                  dd       |S )z@ Prune layers not required for specified intermediates.
        r    )r   rS   rC  r   r  rH  rj  )rM   rk  r|  r}  ru  rv  s         rN   prune_intermediate_layersz'MultiScaleVit.prune_intermediate_layers  sI     #7s4;;7G"Qi DI!!!R(rh   c                 d   | j                  |      \  }}|j                  \  }}}| j                  6| j                  j                  |dd      }t	        j
                  ||fd      }| j                  || j                  z   }| j                  D ]  } |||      \  }} | j                  |      }|S )Nr   r   r   )	r;  r   r<  rr  r   r   r>  rC  rH  )rM   r   r   r   r   r  rw  rS  s           rN   forward_featureszMultiScaleVit.forward_features  s    ''*9''1a>>%..q"b9J		:q/q1A>>%DNN"A[[ 	/E I.LAy	/ IIaLrh   
pre_logitsc                     | j                   r=| j                   dk(  r%|d d | j                  d f   j                  d      }n	|d d df   }|r|S | j                  |      S )Nr   r   r   )r/  r=  meanrK  )rM   r   r  s      rN   forward_headzMultiScaleVit.forward_head  s^    5(a//00166q9adGq0DIIaL0rh   c                 J    | j                  |      }| j                  |      }|S r  )r  r  r   s     rN   r   zMultiScaleVit.forward  s'    !!!$a rh   ))   r  r    N  r   r   FTr  )NFFrp  F)r   FT) r_   r`   ra   r   r   r	   rb   r
   rf   rd   ru   rM  r   r(  ignorer^  rc  rf  r   Modulerh  rj  r   r   r   re   r{  r  r  r  r   r   r   s   @rN   r   r     s   
 )3)-#$&!_'!_' CHo_' 	_'
 "#_' _' "_' _'B/ YY^ ^ YY  YY* * YY		  C hsm  8<$$',9 ||9  eCcN349  	9 
 9  9  !%9  
tELL!5tELL7I)I#JJ	K9 z ./$#	3S	>*  	""1$ 1rh   c           
         d| v r| j                         D ]  }d|v s| |   }|j                         |   j                  }|j                  d   |d   k7  s@t        j                  j
                  j                  |j                  d|j                  d   d      j                  ddd      |d   d      }|j                  d|d         j                  dd      | |<    | S dd l	}d	| v r| d	   } t        |d
d       }t        |dd      }|J d       i d}	t        |      D ]<  \  }
}j                  t        |	|	|z         D ci c]
  }||
||	z
  f c}       |	|z  }	> i }| j                         D ]_  \  }}|j                  dfd|      }|r|j                  dd|      }n|j                  dd|      }d|v r|j!                  dd      }|||<   a |S c c}w )Nzstages.0.blocks.0.norm1.weightrel_posr   r   r   r   linear)r   r/   model_stater"   r)   Tz3model requires depth attribute to remap checkpointszblocks\.(\d+)c           	          dt        | j                  d               d    dt        | j                  d               d    S )Nzstages.r   r   z.blocks.)rb   group)r   	depth_maps    rN   <lambda>z&checkpoint_filter_fn.<locals>.<lambda>  sJ    	#aggaj/ :1 =>hyQTUVU\U\]^U_Q`GabcGdFef rh   z stages\.(\d+).blocks\.(\d+).projz&stages.\1.blocks.\2.shortcut_proj_attnz%stages.\1.blocks.\2.shortcut_proj_mlprK  zhead.projectionhead.fc)keys
state_dictr   r   r   
functionalinterpolater   r   regetattrrs  updaterW   itemssubreplace)r  modelr   r  dest_rel_pos_shaperel_pos_resizedr  r"   r)   	block_idx	stage_idxr]   rL   out_dictr   r  s                  @rN   checkpoint_filter_fnr    s'   ':5" 
	eAA~$Q-%*%5%5%7%:%@%@"==#'9!'<<&+hh&9&9&E&E7==+;R@HHAqQ/2% 'F 'O
 %4$;$;B@RST@U$V$^$^_`bc$dJqM
	e 
".
UHd+F%5KTTTII!&) 	1yR[^_R_A`aA!iY77abQ	 H  " 1FFf
 :>fijkA:>ehijAQ;		+Y7A O% bs   G
)r   r      r   )r"   )r   r      r   r   )r      $   r7      r   F)r"   r$   r%   r)   )r"   r+   )r"   r$   r%   r+   r)   )r7   r   <   r      r    )mvitv2_tinymvitv2_smallmvitv2_basemvitv2_largemvitv2_small_clsmvitv2_base_clsmvitv2_large_clsmvitv2_huge_clsc           	          |j                  dd      }t        t        | |f|s	t        |    nt        |   t        t        |d      d|S )Nout_indicesr7   getter)r  feature_cls)	model_cfgpretrained_filter_fnfeature_cfg)popr   r   
model_cfgsr  rE  )variantcfg_variant
pretrainedkwargsr  s        rN   _create_mvitv2r  	  sW    **]A.K .9*W%j>U1[hG  rh   r  c                 2    | ddd ddt         t        dddd|S )	Nr  )r    r  r  g?bicubiczpatch_embed.projr  T)urlr0  
input_size	pool_sizecrop_pctinterpolationr  r   
first_conv
classifierfixed_input_sizer   )r  r  s     rN   _cfgr    s3    =t%.B(	   rh   zDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_T_in1k.pythztimm/)r  	hf_hub_idzDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_S_in1k.pythzDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_B_in1k.pythzDhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_L_in1k.pyth)r  zEhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_B_in21k.pythiJ  )r  r  r0  zEhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_L_in21k.pythzEhttps://dl.fbaipublicfiles.com/mvit/mvitv2_models/MViTv2_H_in21k.pyth)zmvitv2_tiny.fb_in1kzmvitv2_small.fb_in1kzmvitv2_base.fb_in1kzmvitv2_large.fb_in1kr  zmvitv2_base_cls.fb_inw21kzmvitv2_large_cls.fb_inw21kzmvitv2_huge_cls.fb_inw21kc                     t        dd| i|S )Nr  )r  r  r  r  s     rN   r  r  =      IJI&IIrh   c                     t        dd| i|S )Nr  )r  r  r  s     rN   r  r  B      JZJ6JJrh   c                     t        dd| i|S )Nr  )r  r  r  s     rN   r  r  G  r  rh   c                     t        dd| i|S )Nr  )r  r  r  s     rN   r  r  L  r  rh   c                     t        dd| i|S )Nr  )r  r  r  s     rN   r  r  Q      NNvNNrh   c                     t        dd| i|S )Nr  )r  r  r  s     rN   r  r  V      M
MfMMrh   c                     t        dd| i|S )Nr  )r  r  r  s     rN   r  r  [  r  rh   c                     t        dd| i|S )Nr  )r  r  r  s     rN   r  r  `  r  rh   r  r  )NF)r  r  )Br   rj   collectionsr   dataclassesr   	functoolsr   r   typingr   r   r	   r
   r   r   	timm.datar   r   timm.layersr   r   r   r   r   _builderr   	_featuresr   _features_fxr   _manipulater   	_registryr   r   __all__r   rm   r  ro   rb   re   r   r   r   r   r   r   r   r  r   r  rE  r  r  r  default_cfgsr  r  r  r  r  r  r  r  rg   rh   rN   <module>r     s
    # ! % / /   A R R * + 3 # <.
/ -3 -3 -3`-: ::  #9  5<<%,,//0	   +/

 %,,'
 5<<c"#	
 
 /ll/<</ / S		/
 S	/ <</ <</ /dg299 gTV")) Vr` bii ` FC CL{BII {|,^   " ! "	 & % & %A'
T
	 %R !%k$j %k !%S" #'S# "&S")& 6 J} J J K K K J} J J K K K OM O O N= N N OM O O N= N Nrh   