
    khO                    =   d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZmZmZmZmZmZmZmZ 	 ddl	mZ ddlZddlmZ ddlmc mZ ddlmZ ddlmZm Z m!Z!m"Z"m#Z#m$Z$ dd	l%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8 d
dl9m:Z: d
dl;m<Z< d
dl=m>Z>m?Z?m@Z@mAZA d
dlBmCZCmDZDmEZE dgZF ej                  eH      ZI G d dej                        ZK G d dej                        ZL G d dej                        ZM G d dej                        ZN G d dej                        ZO	 	 	 ddej                  deQdeRdeSfdZT G d dej                        ZUdd!ej                  d"eQd#dfd$ZVdd!ej                  d"eQd%eWd#dfd&ZXdd!ej                  d"eQd#dfd'ZYdd(eQd%eWd#efd)ZZ	 	 	 	 dd+ej                  d,ej                  deRd-eeReRf   d.eQd/eSd#ej                  fd0Z[ ej                         dd1eUd2eQd3eQd4eSd#df
d5       Z]	 dd6eeQej                  f   d1eUd3eQd#eeQej                  f   fd7Z^d6eeQej                  f   d1eUd#eeQej                  f   fd8Z_d6eeQej                  f   d1eUd#eeQej                  f   fd9Z`d6eafd:Zb	 	 	 dd6eeQej                  f   d1eUd<eSd.eQd/eSd#eeQej                  f   fd=Zcdd>eQd#eeQe
f   fd?Zdi d@ eddAB      dC ed       dD eddAB      dE eddFdAd;G      dH eddIdAd;dJdKL      dM eddNdAd;G      dO eddPdAd;dJdKL      dQ eddRdAd;G      dS eddTdAd;dJdKL      dU eddVdAd;G      dW eddXdAd;dJdKL      dY eddZdAd;G      d[ edd\dAd;dJdKL      d] edd^dAd;G      d_ edd`dAd;G      da eddbdAd;dJdKL      dc eddddAe      i df eddgdAdJdKh      di eddjdAdJdKh      dk eddldAd;G      dm eddndAd;dJdKL      do eddpdAd;G      dq eddrdAd;dJdKL      ds eddtdAd;G      du eddvdAd;dJdKL      dw edd x      dy edd x      dz edd x      d{ edd x      d| eddAd}      d~ eddAd}      d eddAd}      d eddAd}      d eddAd}      i d edddAd;d      d edddAd;d      d edddAd;d      d edddAd;d      d edddAd;d      d edddAd;d      d edddAd;d      d eddd;dA      d eddd;dA      d edddAee d      d edddAee d      d edddAee d      d edddAee d      d edddAdee dddK      d edddAdee dddK      d edddAdee dddK      d edddAdee dddK      i d edddAdee dddK      d edddAdee dddK      d edddAdee dddK      d edddAdee dddK      d edddAddddd      d edddAdddd      d edddAe      d eddAdddìī      d eddAdd¬ǫ      d eddAdJddɬʫ      d ed       d eddAe#e$ͫ      d eddAe#e$dKdJϫ      d eddAe#e$dKdѬϫ      d eddAe#e$d¬ӫ      d eddAe#e$dKdJdɬի      d eddAe!e"dKӫ      i d eddAe!e"dKddɬի      d eddAe#e$dKӫ      d eddAe#e$dKddɬի      d ede#e$ܫ      d eddAe#e$ddJdɬի      d eddAe#e$d¬ӫ      d eddAe#e$ddJdɬի      d eddAe#e$dKӫ      d eddAe#e$dKddɬի      d eddAe#e$ͫ      d eddAe#e$dKӫ      d eddAe#e$dKdJdɬի      d eddAe!e"dKӫ      d eddAe!e"dKddɬի      d eddAe#e$dKӫ      d edd e#e$dKddɬի      d eddAe#e$ͫ      i d eddAe#e$ͫ      d eddAe#e$dKdJdɬի      d eddAe#e$dKӫ      d eddAe#e$dì      d eddAe!e"dKdì      d eddAe#e$dKdì      d eddAe#e$dì      d eddAe#e$dKdì      d eddAe#e$d      d eddAe#e$dKd      d eddAe!e"dKd      d eddAe#e$dKd      d eddAe#e$dKd      d eddAe#e$dKd      d eddAde#e$d       d eddAe#e$dKd      d eddAe#e$ddKd      i d eddAe#e$dKd      d eddAe#e$dKd      d eddAe#e$dKdd      d	 eddAe#e$dKd      d
 eddAe#e$dKd      d eddAde#e$dKd      d eddAde#e$dKd      d eddAdde#e$dKd      d eddAdde#e$dKd      d eddAe#e$dddKdd      d eddAdde#e$dKd      d eddAdde#e$dKd      d eddAdde#e$dKd      d eddAdde#e$dKd      d eddAde#e$dKd      d eddAdde#e$dKd      d eddAdde#e$dKd      i d eddAdde#e$dKd      d eddAdde#e$dKd      d eddAde#e$d       d  eddAde#e$d       d! eddAde#e$dKd"      d# eddAde#e$dKdd$      d% edd dd&      d' edd dd&      d( edd x      d) edd x      d* edd x      d+ eddAd,e#e$d-dK.      d/ eddAd,e#e$ddKdɐ0      d1 eddAd,e#e$d-dK.      d2 eddAd,e#e$ddKdɐ0      d3 edd4d;dAdd5      d6 edd7d;dAdd5      i d8 edd9d;dAdd5      d: edd;d;dAdd5      d< edd=d;dAdd5      d> edd?d;dAdd5      d@ eddAd;dAdddB      dC eddDd;dAdddB      dE eddFd;dAdd5      dG eddHd;dAdd5      dI eddJd;dAdd5      dK eddLd;dAdddB      dM eddNd;dAdddB      dO edd x      dP edd x      dQ edd x      dR eddSdAdee dT      dU eddVdAdee dT      dW eddXdAdee dT      i dY eddZdee d[      d\ edd]dee d[      d^ edd_dddKee d`      da eddbdee d[      dc eddAddd      de eddAd}      df eddAd}      dg eddAddd      dh eddAddd      di eddAddd      dj eddAdJdd      dk eddAdJdd      dl eddAdmdd      dn eddAdmdd      do eddAddd      dp eddAddd      dq eddAdJdd      i dr eddAdJdd      ds eddAdmdd      dt eddAd}      du eddAd}      dv eddAddd      dw eddAddd      dx eddAdJdd      dy eddAddd      dz eddAddd      d{ eddAdJdd      d| eddAdmdd      d} eddAddd      d~ eddAdJdd      d eddAddd      d eddAd}      d eddAd}      d eddAddd      i d eddAddd      d eddAddd      d eddAdJdd      d eddAdJdd      d eddAdmdd      d eddAdmdd      d eddAddd      d eddAddd      d eddAdJdd      d eddAdJdd      d eddAdmdd      d eddAd}      d eddAd}      d eddAd}      d eddAd}      d eddAd}      d eddAd}      i d eddAddd      d eddAddKdī      d eddAdJdKdī      d eddAddKdī      d eddAddKdī      d eddAddKdī      d eddAddKdī      d eddAddKdī      d eddAddKdī      d eddAddKdī      d eddAddKdī      d eddAddKdī      d eddAddKdī      d eddAddKdī      d eddAddKdī      d eddAddKdī      d eddAddd      i d eddAddd      d eddAdJdd      d eddAdmdd      d eddAddd      d eddAdJdd      d eddAddKdɬʫ      d eddAddKdɬʫ      d eddAd,e#e$d      d eddAd,e#e$d      d eddAd,e#e$d      d eddAd,e#e$d      d eddAdd¬ǫ      d eddAdd¬ǫ      d eddAdd¬ǫ      d eddAddd      d eddAdd¬ǫ      d eddAdd¬ǫ      i d eddAdd¬ǫ      d eddAdd¬ǫ      d eddAddd      d eddAdd¬ǫ      d eddAdd¬ǫ      d eddAddd      d eddAddd      d eddAdJdKǫ      d eddAdd¬ǫ      d eddAdd¬ǫ      d eddAdd¬ǫ      d eddAdd¬ǫ      d eddAddd      d eddAddd      d eddAdJdKǫ      d eddƐɫ      d eddAdd¬ǫ      i d eddAddd      d eddAdJdKǫ      d eddƐɫ      d eddAddKǫ      d eddAdddK      d eddAdJdKǫ      d eddAddKdɬʫ      d eddAee ddKd      d eddAee ddKd      d eddAe#e$ddKdի      d eddAe#e$ddKdի      d eddAe#e$ddKdի      d eddAe#e$ddKdի      d eddAe#e$ddKdի      d eddAe#e$dddKd۫      d eddAe#e$dddKd۫      d eddAe#e$dddKd۫      i d eddAe#e$dddKd۫      d eddAe#e$dddKd۫      d eddAe#e$dddKd۫      d eddAe#e$dddKd۫      d eddAe#e$dddKd۫      d eddAe#e$dddKd۫      d eddAdd¬ǫ      d eddAdd¬ǫ      d eddAdd¬ǫ      d eddd      d eddAee dKӫ      d eddAee dKӫ      d eddAee dKӫ      d eddAee dKӫ      d edd ee dK      d edd dee dK      d eddAee dKd       eddAee dKd       eddAee dKd       eddAee dKd      dZeeej                         D  cg c](  \  } }|j                  dd*      sd|d   d   v s'| * c}} ZhehD ]@  Zi ej                  eeei         Zkekd   dAk(  r	dAeiz   ekd<   ekeeeij                  dd      <   B  eCee      Zeej                  j                  dd      j                         dk(  Zo	 	 ddeQdeSdeeS   d#eeUdf   fd ZpeDddeSd#eUfd       ZqeDddeSd#eUfd       ZreDddeSd#eUfd       ZseDddeSd#eUfd       ZteDddeSd#eUfd       ZueDddeSd#eUfd       ZveDddeSd#eUfd       ZweDddeSd#eUfd       ZxeDddeSd#eUfd	       ZyeDddeSd#eUfd
       ZzeDddeSd#eUfd       Z{eDddeSd#eUfd       Z|eDddeSd#eUfd       Z}eDddeSd#eUfd       Z~eDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd        ZeDddeSd#eUfd!       ZeDddeSd#eUfd"       ZeDddeSd#eUfd#       ZeDddeSd#eUfd$       ZeDddeSd#eUfd%       ZeDddeSd#eUfd&       ZeDddeSd#eUfd'       ZeDddeSd#eUfd(       ZeDddeSd#eUfd)       ZeDddeSd#eUfd*       ZeDddeSd#eUfd+       ZeDddeSd#eUfd,       ZeDddeSd#eUfd-       ZeDddeSd#eUfd.       ZeDddeSd#eUfd/       ZeDddeSd#eUfd0       ZeDddeSd#eUfd1       ZeDddeSd#eUfd2       ZeDddeSd#eUfd3       ZeDddeSd#eUfd4       ZeDddeSd#eUfd5       ZeDddeSd#eUfd6       ZeDddeSd#eUfd7       ZeDddeSd#eUfd8       ZeDddeSd#eUfd9       ZeDddeSd#eUfd:       ZeDddeSd#eUfd;       ZeDddeSd#eUfd<       ZeDddeSd#eUfd=       ZeDddeSd#eUfd>       ZeDddeSd#eUfd?       ZeDddeSd#eUfd@       ZeDddeSd#eUfdA       ZeDddeSd#eUfdB       ZeDddeSd#eUfdC       ZeDddeSd#eUfdD       ZeDddeSd#eUfdE       ZeDddeSd#eUfdF       ZeDddeSd#eUfdG       ZeDddeSd#eUfdH       ZeDddeSd#eUfdI       ZeDddeSd#eUfdJ       ZeDddeSd#eUfdK       ZeDddeSd#eUfdL       ZeDddeSd#eUfdM       ZeDddeSd#eUfdN       ZeDddeSd#eUfdO       ZeDddeSd#eUfdP       ZeDddeSd#eUfdQ       ZeDddeSd#eUfdR       ZeDddeSd#eUfdS       ZeDddeSd#eUfdT       ZeDddeSd#eUfdU       ZeDddeSd#eUfdV       ZeDddeSd#eUfdW       ZeDddeSd#eUfdX       ZeDddeSd#eUfdY       ZeDddeSd#eUfdZ       ZeDddeSd#eUfd[       ZeDddeSd#eUfd\       ZeDddeSd#eUfd]       ZeDddeSd#eUfd^       ZeDddeSd#eUfd_       ZeDddeSd#eUfd`       ZeDddeSd#eUfda       ZeDddeSd#eUfdb       ZeDddeSd#eUfdc       ZeDddeSd#eUfdd       ZeDddeSd#eUfde       ZeDddeSd#eUfdf       ZeDddeSd#eUfdg       ZeDddeSd#eUfdh       ZeDddeSd#eUfdi       ZeDddeSd#eUfdj       ZeDddeSd#eUfdk       ZeDddeSd#eUfdl       ZeDddeSd#eUfdm       ZeDddeSd#eUfdn       ZeDddeSd#eUfdo       ZeDddeSd#eUfdp       ZeDddeSd#eUfdq       ZeDddeSd#eUfdr       ZeDddeSd#eUfds       ZeDddeSd#eUfdt       ZeDddeSd#eUfdu       ZeDddeSd#eUfdv       ZeDddeSd#eUfdw       ZeDddeSd#eUfdx       ZeDddeSd#eUfdy       ZeDddeSd#eUfdz       ZeDddeSd#eUfd{       ZeDddeSd#eUfd|       ZeDddeSd#eUfd}       ZeDddeSd#eUfd~       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       Z eDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       ZeDddeSd#eUfd       Z eEeHi dddddddddddddddddddddddddddddddddddddd       y# e$ r
 ddlmZ Y {w xY wc c}} w (  a   Vision Transformer (ViT) in PyTorch

A PyTorch implement of Vision Transformers as described in:

'An Image Is Worth 16 x 16 Words: Transformers for Image Recognition at Scale'
    - https://arxiv.org/abs/2010.11929

`How to train your ViT? Data, Augmentation, and Regularization in Vision Transformers`
    - https://arxiv.org/abs/2106.10270

`FlexiViT: One Model for All Patch Sizes`
    - https://arxiv.org/abs/2212.08013

The official jax code is released and available at
  * https://github.com/google-research/vision_transformer
  * https://github.com/google-research/big_vision

Acknowledgments:
  * The paper authors for releasing code and weights, thanks!
  * I fixed my class token impl based on Phil Wang's https://github.com/lucidrains/vit-pytorch
  * Simple transformer style inspired by Andrej Karpathy's https://github.com/karpathy/minGPT
  * Bert reference code checks against Huggingface Transformers and Tensorflow Bert

Hacked together by / Copyright 2020, Ross Wightman
    N)OrderedDict)partial)	AnyCallableDictOptionalSetTupleTypeUnionList)Literal)Final)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STDIMAGENET_INCEPTION_MEANIMAGENET_INCEPTION_STDOPENAI_CLIP_MEANOPENAI_CLIP_STD)	AttentionAttentionPoolLatent
PatchEmbedMlpSwiGLUPackedSwiGLU	LayerNormRmsNormDropPathPatchDropouttrunc_normal_lecun_normal_resample_patch_embedresample_abs_pos_embeduse_fused_attnget_act_layerget_norm_layermaybe_add_mask	LayerType   )build_model_with_cfg)feature_take_indices)named_apply
checkpointcheckpoint_seqadapt_input_conv)generate_default_cfgsregister_modelregister_model_deprecationsVisionTransformerc            	       r     e Zd ZdZ	 	 d
dedededdf fdZdej                  dej                  fd	Z
 xZS )
LayerScalezRLayer scale module.

    References:
      - https://arxiv.org/abs/2103.17239
    diminit_valuesinplacereturnNc                     t         |           || _        t        j                  |t        j                  |      z        | _        y)zInitialize LayerScale module.

        Args:
            dim: Dimension.
            init_values: Initial value for scaling.
            inplace: If True, perform inplace operations.
        N)super__init__r8   nn	Parametertorchonesgamma)selfr6   r7   r8   	__class__s       Z/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/vision_transformer.pyr<   zLayerScale.__init__W   s4     	\\+

3"?@
    xc                 n    | j                   r|j                  | j                        S || j                  z  S )zApply layer scaling.)r8   mul_rA   )rB   rF   s     rD   forwardzLayerScale.forwardh   s(    %)\\qvvdjj!Eq4::~ErE   )h㈵>F)__name__
__module____qualname____doc__intfloatboolr<   r?   TensorrI   __classcell__rC   s   @rD   r5   r5   P   s^     "&!	AA A 	A
 
A"F F%,, FrE   r5   c            !       @    e Zd ZdZddddddddddej
                  eefdeded	e	d
e
de
de
de
de
de	de	dee	   de	deej                     deej                     deej                     ddf  fdZddej                   deej                      dej                   fdZ xZS )Blockz)Transformer block with pre-normalization.      @FT        Nr6   	num_heads	mlp_ratioqkv_biasqk_normscale_attn_normscale_mlp_norm	proj_bias	proj_drop	attn_dropr7   	drop_path	act_layer
norm_layer	mlp_layerr9   c                    t         |            ||      | _        t        |||||||
|	|	      | _        |rt        ||      nt        j                         | _        |dkD  rt        |      nt        j                         | _
         ||      | _         ||t        ||z        ||r|nd||	      | _        |rt        ||      nt        j                         | _        |dkD  rt        |      | _        yt        j                         | _        y)a  Initialize Block.

        Args:
            dim: Number of input channels.
            num_heads: Number of attention heads.
            mlp_ratio: Ratio of mlp hidden dim to embedding dim.
            qkv_bias: If True, add a learnable bias to query, key, value.
            qk_norm: If True, apply normalization to query and key.
            proj_bias: If True, add bias to output projection.
            proj_drop: Projection dropout rate.
            attn_drop: Attention dropout rate.
            init_values: Initial values for layer scale.
            drop_path: Stochastic depth rate.
            act_layer: Activation layer.
            norm_layer: Normalization layer.
            mlp_layer: MLP layer.
        rY   r[   r\   
scale_normr_   ra   r`   rd   r7   rX   Nin_featureshidden_featuresrc   rd   biasdrop)r;   r<   norm1r   attnr5   r=   Identityls1r   
drop_path1norm2rO   mlpls2
drop_path2rB   r6   rY   rZ   r[   r\   r]   r^   r_   r`   ra   r7   rb   rc   rd   re   rC   s                   rD   r<   zBlock.__init__p   s    F 	_
&!

	 @K:c{;PRP[P[P]1:R(9-R[[]_
i0%3z
 @K:c{;PRP[P[P]1:R(9-R[[]rE   rF   	attn_maskc           
         || j                  | j                  | j                  | j                  |      |                  z   }|| j	                  | j                  | j                  | j                  |                        z   }|S Nry   )rs   rr   rp   ro   rw   rv   ru   rt   rB   rF   ry   s      rD   rI   zBlock.forward   se    4::a=I)V WXX$**Q-)@ ABBrE   NrK   rL   rM   rN   r=   GELUr   r   rO   rP   rQ   r   r   Moduler<   r?   rR   rI   rS   rT   s   @rD   rV   rV   m   s2   3  ""!$)#("!!+/!)+*3),!=S=S =S 	=S
 =S =S "=S !=S =S =S =S "%=S =S BII=S RYY=S  BII!=S" 
#=S~ (5<<2H TYT`T` rE   rV   c            !       D    e Zd Zddddddddddej                  eefdededed	e	d
e	de	de	de	dedede
e   dedeej                     deej                     deej                     ddf  fdZddZddej                   de
ej                      dej                   fdZ xZS )ResPostBlockrW   FTrX   Nr6   rY   rZ   r[   r\   r]   r^   r_   r`   ra   r7   rb   rc   rd   re   r9   c                    t         |           || _        t        |||||||
|	|	      | _         ||      | _        |dkD  rt        |      nt        j                         | _	         ||t        ||z        ||r|nd ||	      | _         ||      | _        |dkD  rt        |      nt        j                         | _        | j                          y )Nrg   rX   rj   )r;   r<   r7   r   rp   ro   r   r=   rq   rs   rO   ru   rt   rw   init_weightsrx   s                   rD   r<   zResPostBlock.__init__   s    $ 	&&!

	  _
1:R(9-R[[]i0%3z
  _
1:R(9-R[[]rE   c                    | j                   }t        j                  j                  | j                  j
                  | j                          t        j                  j                  | j                  j
                  | j                          y y r~   )r7   r=   init	constant_ro   weightrt   rB   s    rD   r   zResPostBlock.init_weights   s[    'GGdjj//1A1ABGGdjj//1A1AB (rE   rF   ry   c           	          || j                  | j                  | j                  ||                  z   }|| j                  | j	                  | j                  |                  z   }|S r{   )rs   ro   rp   rw   rt   ru   r}   s      rD   rI   zResPostBlock.forward   sT    

499Q)9+L MNN

488A; 788rE   r9   Nr~   )rK   rL   rM   r=   r   r   r   rO   rP   rQ   r   r   r   r<   r   r?   rR   rI   rS   rT   s   @rD   r   r      s$   
  ""!$)#("!!+/!)+*3),!.. . 	.
 . . ". !. . . . "%. . BII. RYY.  BII!." 
#.`C (5<<2H TYT`T` rE   r   c            !       X    e Zd ZU dZee   ed<   ddddddddddej                  e	dfde
d	e
d
ededededededededee   dedeej                     deej                     deeej                        ddf  fdZddej"                  deej"                     dej"                  fdZ xZS )ParallelScalingBlockz Parallel ViT block (MLP & Attention in parallel)
    Based on:
      'Scaling Vision Transformers to 22 Billion Parameters` - https://arxiv.org/abs/2302.05442
    
fused_attnrW   FTrX   Nr6   rY   rZ   r[   r\   r]   r^   r_   r`   ra   r7   rb   rc   rd   re   r9   c                    t         |           ||z  dk(  sJ d       |s|rJ d       || _        ||z  | _        | j                  dz  | _        t               | _        t        ||z        }|d|z  z   } ||      | _        t        j                  |||      | _        |g|gdz  z   | _        |r%| j                  dd        | j                  dd        nW| j                  dt        j                   d|z        d	
       t        j"                  t        j                   |            | _        |r || j                        nt        j&                         | _        |r || j                        nt        j&                         | _        t        j,                  |
      | _        t        j                  |||      | _        t        j,                  |	      | _         |       | _        t        j                  |||      | _        |t9        ||      nt        j&                         | _        |dkD  rt=        |      | _        y t        j&                         | _        y )Nr   z$dim should be divisible by num_headszScale norms not supportedg         )rm   r[   mlp_biasF)
persistentri   rX   ) r;   r<   rY   head_dimscaler$   r   rO   in_normr=   Linearin_projin_splitregister_bufferregister_parameterr?   zerosr>   r   rq   q_normk_normDropoutra   attn_out_projmlp_dropmlp_actmlp_out_projr5   lsr   rb   )rB   r6   rY   rZ   r[   r\   r]   r^   r_   r`   ra   r7   rb   rc   rd   re   mlp_hidden_dimin_proj_out_dimrC   s                     rD   r<   zParallelScalingBlock.__init__   s   $ 	Y!#K%KK#">V;VV9"y(]]d*
(*Y_-(1s72!#yyoHE'(C5194  T2##J5  U[[S-Ae TLL^)DEDM3:j/3:j/I.YYsCi@

9- {IInc	J>I>U*Sk:[][f[f[h09B),BKKMrE   rF   ry   c                 f   |j                   \  }}}| j                  |      }| j                  Ut        j                  || j
                  j                  t        j                  | j                  | j                  f            }n| j                  |      }t        j                  || j                  d      \  }}}	}
| j                  |j                  ||| j                  | j                              j!                  dd      }| j#                  |	j                  ||| j                  | j                              j!                  dd      }	|
j                  ||| j                  | j                        j!                  dd      }
| j$                  r=t        j&                  ||	|
|| j(                  r| j*                  j,                  nd      }nX|| j.                  z  }||	j!                  dd      z  }t1        ||      }|j3                  d      }| j+                  |      }||
z  }|j!                  dd      j5                  |||      }| j7                  |      }| j9                  |      }| j;                  |      }| j=                  |      }| j?                  | jA                  ||z               }||z   }|S )Nr6   r)      rX   )ry   	dropout_p)!shaper   r   Flinearr   r   r?   catr[   splitr   r   viewrY   r   	transposer   r   scaled_dot_product_attentiontrainingra   pr   r'   softmaxreshaper   r   r   r   rb   r   )rB   rF   ry   BNCyx_mlpqkvx_attnrp   s                rD   rI   zParallelScalingBlock.forward+  s1   ''1a LLO==$ DLL//DMM4==;Y1Z[AQAQ2>q!Q KKq!T^^T]]CDNNqRSTKKq!T^^T]]CDNNqRSTFF1a7AA!QG??331a#.2mm$..**F DJJAq{{2r**D!$	2D<<B<'D>>$'DAXF!!!Q'//1a8##F+ U#e$!!%( NN4776E>23ErE   r~   )rK   rL   rM   rN   r   rQ   __annotations__r=   r   r   rO   rP   r   r   r   r<   r?   rR   rI   rS   rT   s   @rD   r   r      sE    d  ""!$)#("!!+/!)+*337!0R0R 0R 	0R
 0R 0R "0R !0R 0R 0R 0R "%0R 0R BII0R RYY0R   RYY0!0R" 
#0Rd* *(5<<2H *TYT`T` *rE   r   c            #       F    e Zd ZdZdddddddddddej
                  eefded	ed
ede	de
de
de
de
de
dee	   de	de	de	deej                     deej                     deej                     ddf" fdZddej                   deej                      dej                   fdZ xZS )ParallelThingsBlockz Parallel ViT block (N parallel attention followed by N parallel MLP)
    Based on:
      `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
    r   rW   FTNrX   r6   rY   num_parallelrZ   r[   r\   r]   r^   r_   r7   r`   ra   rb   rc   rd   re   r9   c                 D   t         |           || _        t        j                         | _        t        j                         | _        t        |      D ]I  }| j
                  j                  t        j                  t        d ||      fdt        ||||||	|||	      fd|
rt        ||
      nt        j                         fd|dkD  rt        |      nt        j                         fg                   | j                  j                  t        j                  t        d ||      fd ||t        ||z        ||r|nd |	|	      fd|
rt        ||
      nt        j                         fd|dkD  rt        |      nt        j                         fg                   L y )
Nnormrp   rg   r   ri   rb   rX   ru   )rl   rc   rd   rm   rn   )r;   r<   r   r=   
ModuleListattnsffnsrangeappend
Sequentialr   r   r5   rq   r   rO   )rB   r6   rY   r   rZ   r[   r\   r]   r^   r_   r7   r`   ra   rb   rc   rd   re   _rC   s                     rD   r<   zParallelThingsBlock.__init__]  s~   & 	(]]_
MMO	|$ 	AJJbmmKC)'%#.''')
 
 ;z#;?TVT_T_TabY^hy1W9 -    IIR]];C)	$'i$8'-;z""  ;z#;?TVT_T_TabY^hy1W8 ,  #	rE   rF   ry   c           	      \   |g }| j                   D ]Y  }|j                  |      }|j                  ||      }|j                  |      }|j	                  |      }|j                  |       [ |t        j                  |      j                  d      z   }nF|t        j                  | j                   D cg c]
  } ||       c}      j                  d      z   }|t        j                  | j                  D cg c]
  } ||       c}      j                  d      z   }|S c c}w c c}w )Nr|   r   r   )
r   r   rp   r   rb   r   r?   stacksumr   )rB   rF   ry   attn_outrp   r   ffns          rD   rI   zParallelThingsBlock.forward  s     H

 (16Y?/'( EKK)--!-44AEKKTZZ @Ta @AEE!ELLAtyy9SV9:>>1>EE !A9s   4D$
:D)
r~   r   rT   s   @rD   r   r   X  s0    !"!"!$)#("+/!!!)+*3),#44 4 	4
 4 4 4 "4 !4 4 "%4 4 4 4 BII4  RYY!4" BII#4$ 
%4l (5<<2H TYT`T` rE   r   rF   	pool_typenum_prefix_tokensreduce_include_prefixc                 &   |s| S |dk(  r| d d df   } | S |r| n
| d d |d f   } |dk(  r| j                  d      } | S |dk(  r*d| j                  d      | j                  d      z   z  } | S |dk(  r| j                  d      } | S |r
J d	|        | S )
Ntokenr   avgr)   r   avgmaxg      ?maxzUnknown pool type )meanamax)rF   r   r   r   s       rD   global_pool_nlcr     s     GadG H 'AAa1B1C.C,D1A H ("qvv!v}qvv!v}45A H %1A H !B$6yk"BB=HrE   c            Q           e Zd ZU dZee   ed<   ddddddd	d	d
ddddddddddddddddddddddddedddee	f'de
eeeef   f   de
eeeef   f   dededed   dedededededededed ed!ee   d"ed#ed$ed%ed&ed'ed(ee   d)eded*ed+ed,ed-ed.ed/ed0ed1ed2   d3ed4ed5ee   d6ee   d7ee   d8eej*                     d9eej*                     d:dfP fd;Zdcd<Zddd=ed:dfd>Zd?ej*                  d:dfd@Zej6                  j9                         dddAedBed:dfdC       Zej6                  j8                  d:ee   fdD       Zej6                  j8                  dedEed:e ee
ee!f   f   fdF       Z"ej6                  j8                  dfdGed:dfdH       Z#ej6                  j8                  d:ej*                  fdI       Z$dgdedee   d:dfdJZ%	 	 dhdeeeef      deeeef      d:dfdKZ&dLejN                  d:ejN                  fdMZ(	 	 	 	 	 	 	 	 didLejN                  dNee
ee!e   f      dOedPedQedRedSedTedUeejN                     d:e
e!ejN                     eejN                  e!ejN                     f   e ee)f   f   fdVZ*	 	 	 djdNe
ee!e   f   dWedXed:e!e   fdYZ+	 	 	 	 	 dkdLejN                  dZe
ee!e   ee   f   d[edOedPedUeejN                     d:e!ejN                     fd\Z,dgdLejN                  dUeejN                     d:ejN                  fd]Z-dgdLejN                  d^ee   d:ejN                  fd_Z.dedLejN                  d`ed:ejN                  fdaZ/dgdLejN                  dUeejN                     d:ejN                  fdbZ0 xZ1S )lr3   z Vision Transformer

    A PyTorch impl of : `An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale`
        - https://arxiv.org/abs/2010.11929
    dynamic_img_size      r     r         rW   TFNlearnr   rX    img_size
patch_sizein_chansnum_classesglobal_poolr   r   r   r   r   map	embed_dimdepthrY   rZ   r[   r\   r]   r^   r_   r7   class_token	pos_embedno_embed_class
reg_tokenspre_norm
final_normfc_normpool_include_prefixdynamic_img_pad	drop_ratepos_drop_ratepatch_drop_rateproj_drop_rateattn_drop_ratedrop_path_rateweight_init)skipjaxjax_nlhbmocor   fix_initembed_layerembed_norm_layerrd   rc   block_fnre   r9   c(                 p   t         0|           |dv sJ |s|dk7  sJ |dv sJ ||dv n|}(t        |$      xs t        }$t        |#      }#t	        |%      xs t
        j                  }%|| _        || _        |x| _	        x| _
        | _        |rdnd| _        | xj                  |z  c_        || _        || _        || _        || _        || _        d| _        i })|r|)j'                  t)        dd	
             |#|#|)d<    |"d||||| |d|)| _        | j*                  j,                  }*t/        | j*                  d      r| j*                  j1                         n|}+|r*t        j2                  t5        j6                  dd|            nd| _        |r*t        j2                  t5        j6                  d||            nd| _        |r|*n|*| j                  z   },|r|dk(  rd| _        n2t        j2                  t5        j>                  d|,|      dz        | _        t        j@                  |      | _!        |dkD  rtE        || j                        | _#        nt        jH                         | _#        |r |$|      nt        jH                         | _%        t5        jL                  d||      D -cg c]  }-|-jO                          }.}-t        jP                  tS        |      D /cg c]  }/ |&|||	|
||||||||.|/   |$|%|'       c}/ | _*        tS        |      D /cg c]  }/t)        d|/ ||+       c}/| _+        |r
|(s |$|      nt        jH                         | _,        |dk(  r t[        | j                  ||	|$|%      | _.        nd| _.        |r
|(r |$|      nt        jH                         | _/        t        j@                  |      | _0        |dkD  r t        jb                  | j                  |      nt        jH                         | _2        | dk7  r| jg                  |        |!r| ji                          yyc c}-w c c}/w c c}/w )a  
        Args:
            img_size: Input image size.
            patch_size: Patch size.
            in_chans: Number of image input channels.
            num_classes: Number of classes for classification head.
            global_pool: Type of global pooling for final sequence (default: 'token').
            embed_dim: Transformer embedding dimension.
            depth: Depth of transformer.
            num_heads: Number of attention heads.
            mlp_ratio: Ratio of mlp hidden dim to embedding dim.
            qkv_bias: Enable bias for qkv projections if True.
            init_values: Layer-scale init values (layer-scale enabled if not None).
            class_token: Use class token.
            no_embed_class: Don't include position embeddings for class (or reg) tokens.
            reg_tokens: Number of register tokens.
            pre_norm: Enable norm after embeddings, before transformer blocks (standard in CLIP ViT).
            final_norm: Enable norm after transformer blocks, before head (standard in most ViT).
            fc_norm: Move final norm after pool (instead of before), if None, enabled when global_pool == 'avg'.
            drop_rate: Head dropout rate.
            pos_drop_rate: Position embedding dropout rate.
            attn_drop_rate: Attention dropout rate.
            drop_path_rate: Stochastic depth rate.
            weight_init: Weight initialization scheme.
            fix_init: Apply weight initialization fix (scaling w/ layer index).
            embed_layer: Patch embedding layer.
            embed_norm_layer: Normalization layer to use / override in patch embed module.
            norm_layer: Normalization layer.
            act_layer: MLP activation layer.
            block_fn: Transformer block layer.
        r   r   )r   noner   N)r   r   r   r)   r   FNHWC)strict_img_size
output_fmtrd   )r   r   r   r   rm   r   
feat_ratior
  {Gz?)r   )r   )r6   rY   rZ   r[   r\   r]   r^   r_   r7   r`   ra   rb   rd   rc   re   blocks.)modulenum_chs	reductionr   )rY   rZ   rd   rc   r   )5r;   r<   r&   r   r%   r=   r   r   r   num_featureshead_hidden_sizer   r   num_reg_tokenshas_class_tokenr   r   r   grad_checkpointingupdatedictpatch_embednum_patcheshasattrr  r>   r?   r   	cls_token	reg_tokenr   randnr   pos_dropr   
patch_droprq   norm_prelinspaceitemr   r   blocksfeature_infor   r   	attn_poolr   	head_dropr   headr   fix_init_weight)1rB   r   r   r   r   r   r   r   rY   rZ   r[   r\   r]   r^   r_   r7   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  rd   rc   r  re   use_fc_norm
embed_argsr  r  	embed_lenrF   dprirC   s1                                                   rD   r<   zVisionTransformer.__init__  s   R 	JJJJkW4441111AHk%==V]#J/<9
)*:;!),7	&&ENNND1DN&1q*,(*,#6  0"'
d5VLM''7J|$& 
!+
 
 &&225<T=M=M|5\D$$//1bl	GRekk!Q	&BCX\PZekk!Z&KL`d#1K{TE[E[7[	I/!DN\\%++aI*NQT*TUDN

]3Q*"&"8"8DO
 !kkmDO19
9-r{{}!&>5!IJAqvvxJJmm$ 5\%&#$ # ##! /-#'((a&%##&# $( Y^^cXdfSTD'!yINf-7Jy)QSQ\Q\Q^	 %0##%#DN "DN0:{z),PRP[P[P]I.>IAoBIIdnnk:SUS^S^S`	& k*  " S K&#&fs   %P)!P.P3c                    d }t        | j                        D ]m  \  }} ||j                  j                  j                  j
                  |dz           ||j                  j                  j                  j
                  |dz          o y)z9Apply weight initialization fix (scaling w/ layer index).c                 R    | j                  t        j                  d|z               y )Ng       @)div_mathsqrt)param	_layer_ids     rD   rescalez2VisionTransformer.fix_init_weight.<locals>.rescales  s    JJtyyy12rE   r)   N)	enumerater'  rp   projr   dataru   fc2)rB   r9  layer_idlayers       rD   r,  z!VisionTransformer.fix_init_weightq  si    	3  )5 	=OHeEJJOO**//A>EIIMM((--x!|<	=rE   modec                    |dv sJ d|v r t        j                  | j                         nd}| j                  t	        | j                  d       | j
                  +t        j                  j                  | j
                  d       | j                  +t        j                  j                  | j                  d       t        t        ||      |        y)zInitialize model weights.

        Args:
            mode: Weight initialization mode ('jax', 'jax_nlhb', 'moco', or '').
        )r  r  r  r   nlhbrX   Nr  stdư>)r5  logr   r   r    r  r=   r   normal_r   r,   get_init_weights_vit)rB   r@  	head_biass      rD   r   zVisionTransformer.init_weightsz  s     666639T>TXXd..//r	>>%$..c2>>%GGOODNNO5>>%GGOODNNO5(y94@rE   mc                     t        |       y)z>Initialize weights for a single module (compatibility method).N)init_weights_vit_timm)rB   rJ  s     rD   _init_weightszVisionTransformer._init_weights  s     	a rE   checkpoint_pathprefixc                     t        | ||       y)zLoad pretrained weights.

        Args:
            checkpoint_path: Path to checkpoint.
            prefix: Prefix for state dict keys.
        N)_load_weights)rB   rN  rO  s      rD   load_pretrainedz!VisionTransformer.load_pretrained  s     	dOV4rE   c                 
    h dS )z3Set of parameters that should not use weight decay.>   r  r   
dist_tokenr  r   s    rD   no_weight_decayz!VisionTransformer.no_weight_decay  s
     87rE   coarsec                      t        dddg      S )zCreate regex patterns for parameter grouping.

        Args:
            coarse: Use coarse grouping.

        Returns:
            Dictionary mapping group names to regex patterns.
        z ^cls_token|pos_embed|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr'  )r  )rB   rV  s     rD   group_matcherzVisionTransformer.group_matcher  s     4-/CD
 	
rE   enablec                 v    || _         t        | j                  d      r| j                  j                  |       yy)zEnable or disable gradient checkpointing.

        Args:
            enable: Whether to enable gradient checkpointing.
        set_grad_checkpointingN)r  r  r  r\  )rB   rZ  s     rD   r\  z(VisionTransformer.set_grad_checkpointing  s6     #)4##%=>33F; ?rE   c                     | j                   S )zGet the classifier head.)r+  r   s    rD   get_classifierz VisionTransformer.get_classifier  s     yyrE   c                    || _         |=|dv sJ |dk(  r| j                  J d       |dk7  r| j                  d| _        || _        |dkD  r&t        j                  | j
                  |      | _        yt        j                         | _        y)zReset the classifier head.

        Args:
            num_classes: Number of classes for new classifier.
            global_pool: Global pooling type.
        Nr   r   z=Cannot currently add attention pooling in reset_classifier().r   )r   r)  r   r=   r   r   rq   r+  )rB   r   r   s      rD   reset_classifierz"VisionTransformer.reset_classifier  s     '""NNNNe#(>]]]u%$..*D!%*D>IAoBIIdnnk:	SUS^S^S`	rE   c           	         | j                   j                  }| j                   j                  ||       | j                  | j                  rdn| j
                  }| j                   j                  |z   }|| j                  j                  d   k7  rGt        j                  t        | j                  | j                   j                  ||d            | _        yyy)zUpdate the input image resolution and patch size.

        Args:
            img_size: New input resolution, if None current resolution is used.
            patch_size: New patch size, if None existing patch size is used.
        )r   r   Nr   r)   T)new_sizeold_sizer   verbose)r  	grid_sizeset_input_sizer   r   r   r  r   r=   r>   r#   )rB   r   r   prev_grid_sizer   num_new_tokenss         rD   rf  z VisionTransformer.set_input_size  s     ))33''j'Q>>%%)%8%8d>T>T!--99<MMN!5!5a!88!#.DNN!--77+&7 / " 9 &rE   rF   c                 \   | j                   -|j                  |j                  d   d|j                  d         S | j                  rn|j                  \  }}}}| j                  j
                  }t        | j                   ||f|| j                  rdn| j                        }|j                  |d|      }n| j                   }g }| j                  9|j                  | j                  j                  |j                  d   dd             | j                  9|j                  | j                  j                  |j                  d   dd             | j                  r#||z   }|r>t        j                  ||gz   d      }n"|rt        j                  ||gz   d      }||z   }| j                  |      S )z$Apply positional embedding to input.r   r   )rb  rc  r   r)   r   )r   r   r   r   r  re  r#   r   r   r  r   expandr   r?   r   r"  )	rB   rF   r   HWr   rg  r   to_cats	            rD   
_pos_embedzVisionTransformer._pos_embed  sl   >>!66!''!*b!''"+66  JAq!Q!--77N.Q''+':':!@V@V	I q"a AI>>%MM$..//
BCD>>%MM$..//
BCD IAIIfsl2 IIfsl2IA}}QrE   indicesreturn_prefix_tokensr   
stop_earlyr  intermediates_onlyoutput_dictry   c
           	      $   |dv sJ d       |dk(  }
g }t        t        | j                        |      \  }}|j                  \  }}}}| j	                  |      }| j                  |      }| j                  |      }| j                  |      }t        j                  j                         s|s| j                  }n| j                  d|dz    }t        |      D ]z  \  }}|	 |||	      }n?| j                  r+t        j                  j                         st        ||      }n ||      }||v sW|j                  |r| j                  |      n|       | | j                   rE|D cg c]  }|ddd| j                   f    }}|D cg c]  }|dd| j                   df    }}nd}|
ra| j                  j#                  ||f      \  }}|D cg c]6  }|j%                  |||d      j'                  dd	dd
      j)                         8 }}|r*i }||d<   ||r||d<   |s| j                  |      }||d<   |S t        j                  j                         s|r|t+        t-        ||            }|r|S | j                  |      }||fS c c}w c c}w c c}w )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            return_prefix_tokens: Return both prefix and spatial intermediate tokens
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
            output_dict: Return outputs as a dictionary with 'image_features' and 'image_intermediates' keys
            attn_mask: Optional attention mask for masked attention (e.g., for NaFlex)
        Returns:
            A tuple with (final_features, intermediates), a list of intermediate features, or a dictionary containing
            'image_features' and 'image_intermediates' (and optionally 'image_intermediates_prefix')
        )NCHWNLCz)Output format must be one of NCHW or NLC.ru  Nr)   r|   r   r   r   r   image_intermediatesimage_intermediates_prefiximage_features)r+   lenr'  r   r  rn  r#  r$  r?   jitis_scriptingr:  r  r-   r   r   r   dynamic_feat_sizer   permute
contiguouslistzip)rB   rF   ro  rp  r   rq  r  rr  rs  ry   r   intermediatestake_indices	max_indexr   r   heightwidthr'  r1  blkr   prefix_tokensrk  rl  result_dictx_finals                              rD   forward_intermediatesz'VisionTransformer.forward_intermediates  s   8 _,Y.YY,&"6s4;;7G"Qi  gg1feQOOAOOAMM!99!!#:[[F[[)a-0F' 		BFAs$Y/((1G1G1IsA&FL $$TTYYq\qA		B !!ERSQq!D$:$:"::;SMSDQRqQq$"8"8"99:RMR M##55vuoFDAq^klYZQYYq!Q3;;Aq!QGRRTlMl K1>K-.(-A<I89 &))A,07,- yy%%',@]E^ ]M!BCM  IIaL-G TR ms   !JJ
;J
prune_norm
prune_headc                    t        t        | j                        |      \  }}| j                  d|dz    | _        |rt        j                         | _        |r+t        j                         | _        | j                  dd       |S )aE  Prune layers not required for specified intermediates.

        Args:
            indices: Indices of intermediate layers to keep.
            prune_norm: Whether to prune normalization layer.
            prune_head: Whether to prune the classifier head.

        Returns:
            List of indices that were kept.
        Nr)   r   r   )r+   rz  r'  r=   rq   r   r   r`  )rB   ro  r  r  r  r  s         rD   prune_intermediate_layersz+VisionTransformer.prune_intermediate_layersm  sj      #7s4;;7G"Qikk.9q=1DI;;=DL!!!R(rE   nr   c           	      :    | j                  |||||rdndd|      S )a  Get intermediate layer outputs (DINO interface compatibility).

        NOTE: This API is for backwards compat, favour using forward_intermediates() directly.

        Args:
            x: Input tensor.
            n: Number or indices of layers.
            reshape: Reshape to NCHW format.
            return_prefix_tokens: Return prefix tokens.
            norm: Apply normalization.

        Returns:
            List of intermediate features.
        ru  rv  T)rp  r   r  rr  ry   )r  )rB   rF   r  r   rp  r   ry   s          rD   get_intermediate_layersz)VisionTransformer.get_intermediate_layers  s4    . ))q!5!(ve# * 
 	
rE   c                    | j                  |      }| j                  |      }| j                  |      }| j                  |      }|| j                  D ]  } |||      } nR| j
                  r5t        j                  j                         st        | j                  |      }n| j	                  |      }| j                  |      }|S )z\Forward pass through feature layers (embeddings, transformer blocks, post-transformer norm).r|   )r  rn  r#  r$  r'  r  r?   r{  r|  r.   r   )rB   rF   ry   r  s       rD   forward_featuresz"VisionTransformer.forward_features  s    QOOAOOAMM! {{ 0Y/0$$UYY-C-C-Et{{A.AAAIIaLrE   r   c                     | j                   4| j                  s|dd| j                  df   }| j                  |      }|S || j                  n|}t	        ||| j                  | j                        }|S )zApply pooling to feature tokens.

        Args:
            x: Feature tensor.
            pool_type: Pooling type override.

        Returns:
            Pooled features.
        N)r   r   r   )r)  r   r   r   r   )rB   rF   r   s      rD   poolzVisionTransformer.pool  s}     >>%++a//001q!AH(1(9D$$y	"44"&":":	
 rE   
pre_logitsc                     | j                  |      }| j                  |      }| j                  |      }|r|S | j                  |      S )zForward pass through classifier head.

        Args:
            x: Feature tensor.
            pre_logits: Return features before final classifier.

        Returns:
            Output tensor.
        )r  r   r*  r+  )rB   rF   r  s      rD   forward_headzVisionTransformer.forward_head  sA     IIaLLLONN1q0DIIaL0rE   c                 N    | j                  ||      }| j                  |      }|S r{   )r  r  r}   s      rD   rI   zVisionTransformer.forward  s,    !!!y!9a rE   r   r   F)Tr~   )NN)NFFFru  FFN)r)   FT)r)   FFFN)2rK   rL   rM   rN   r   rQ   r   r   rV   r   r   rO   r
   r   rP   r   strr   r(   r   r=   r   r<   r,  r   rM  r?   r{  ignorerR  r	   rU  r   r   rY  r\  r^  r`  rf  rR   rn  r   r  r  r  r  r  r  rI   rS   rT   s   @rD   r3   r3     s   
 Dk! 5868#OV !!!$)#("+/ $$#("#&*(-%*$)!#%%'$&$&$&JL"$.48.2-1(-),Qk#CsCx01k# c5c?23k# 	k#
 k# !!KLk# k# k# k# k# k# k# "k# !k# k#  "%!k#" #k#$ %k#& !'k#( )k#* +k#, -k#. d^/k#0 "&1k#2 #3k#4 "5k#6 7k#8 !9k#: #;k#< "=k#> "?k#@ "Ak#B !!FGCk#D Ek#F "Gk#H 'y1Ik#J !+Kk#L  	*Mk#N 299oOk#P BIIQk#R 
Sk#Z=A Ad A !ryy !T !
 YY5s 5C 5 5 5 YY8S 8 8 YY
D 
T#uS$Y?O:O5P 
 
 YY<T <T < < YY		  aC ahsm aW[ a& 3748uS#X/ !sCx1 
	2% ELL % U\\ % T 8<).$$', %04] ||]  eCcN34]  #'	] 
 ]  ]  ]  !%]  ]   -]  
tELL!5tELL7I)I#JDQTVYQYNZ	[] B ./$#	3S	>*  	
 
c8 45!).04
||
 S$s)U3Z/0
 	

 #'
 
  -
 
ell	
@%,, 8ELL;Q ]b]i]i &ell x}  01ell 1 1 1 (5<<2H TYT`T` rE   r   r  namer9   c                    t        | t        j                        rNt        | j                  d       | j
                  *t        j                  j                  | j
                         yyt        | d      r| j                          yy)zViT weight initialization, original timm impl (for reproducibility).

    Args:
        module: Module to initialize.
        name: Module name for context.
    r  rC  Nr   )

isinstancer=   r   r    r   rm   r   zeros_r  r   )r  r  s     rD   rL  rL    s^     &"))$fmm-;;"GGNN6;;' #		( 
)rE   rI  c                 &   t        | t        j                        r|j                  d      rTt        j                  j                  | j                         t        j                  j                  | j                  |       yt        j                  j                  | j                         | j                  Yd|v r+t        j                  j                  | j                  d      n(t        j                  j                  | j                         yyt        | t        j                        rLt        | j                         | j                  *t        j                  j                  | j                         yyt        | d      r| j                          yy)zViT weight initialization, matching JAX (Flax) impl.

    Args:
        module: Module to initialize.
        name: Module name for context.
        head_bias: Bias value for head layer.
    r+  Nru   rE  rC  r   )r  r=   r   
startswithr   r  r   r   rm   xavier_uniform_rG  Conv2dr!   r  r   )r  r  rI  s      rD   init_weights_vit_jaxr    s     &"))$??6"GGNN6==)GGfkk95GG##FMM2{{&:?4-6RWW^^\b\g\gMh '	FBII	&fmm$;;"GGNN6;;' #		( 
)rE   c                 >   t        | t        j                        rd|v rt        j                  dt        | j                  j                  d   dz  | j                  j                  d   z         z        }t        j                  j                  | j                  | |       n)t        j                  j                  | j                         | j                  *t        j                  j                  | j                         yyt        | d      r| j                          yy)zViT weight initialization, matching moco-v3 impl minus fixed PatchEmbed.

    Args:
        module: Module to initialize.
        name: Module name for context.
    qkvg      @r   r   r)   Nr   )r  r=   r   r5  r6  rP   r   r   r   uniform_r  rm   r  r  r   )r  r  vals      rD   init_weights_vit_mocor    s     &"))$D=))Bv}}':':1'='BV]]EXEXYZE['[!\\]CGGV]]SD#6GG##FMM2;;"GGNN6;;' #		( 
)rE   r@  c                 L    d| v rt        t        |      S d| v rt        S t        S )Nr  )rI  r  )r   r  r  rL  )r@  rI  s     rD   rH  rH  !  s*    }+yAA	4$$$$rE   r  posemb
posemb_newgs_newinterpolation	antialiasc           	         |j                   d   |z
  }| j                   d   |z
  }t        t        j                  |            gdz  }t	        |      s"t        t        j                  |            gdz  }t        | |||||d      S )z Rescale the grid of position embeddings when loading from state_dict.
    *DEPRECATED* This function is being deprecated in favour of using resample_abs_pos_embed
    r)   r   T)r   r  r  rd  )r   rO   r5  r6  rz  r#   )	r  r  r   r  r  r  ntok_newntok_oldgs_olds	            rD   resize_pos_embedr  *  s     "%66H||A!22H$))H%&'!+Fv;dii)*+a/!+# rE   modelrN  rO  load_bfloat16c                    ddl r
ddlm  ddldIfd	}rj                  |      }nj                  |      }d}d}d}|sd|v rd}nd	|v rd
}d}nd|v rd}d}t	        | j
                  d      r| j
                  j                  }	t	        |	d       }
|
r|	n|	j                  }|j                  j                  j                  t        |j                  j                  j                  d    ||| d                      |j                  j                  j                   ||| d                |j                  j                  j                   ||| d                |
st        |	j                         D ]  \  }}t        |j"                        D ]  \  }}| d|dz    d|dz    d}t%        d      D ]  }t'        |d|dz          j                  j                   ||| d|dz    d                t'        |d|dz          j                  j                   ||| d|dz    d                t'        |d|dz          j                  j                   ||| d|dz    d                 |j(                  |j(                  j                  j                  j                   ||| d                |j(                  j                  j                  j                   ||| d                |j(                  j                  j                  j                   ||| d                   ||| d          }nCt        | j
                  j*                  j                  j                  d    ||| d                }|j                  d!d | j
                  j*                  j                  j                  d!d k7  r;t-        || j
                  j*                  j                  j                  d!d ||d"      }| j
                  j*                  j                  j                  |       | j
                  j*                  j                  j                   ||| d#                | j.                  )| j.                  j                   ||| d$   d%             |r ||| d&   d%      }n ||| d'   d%      }|j                  | j0                  j                  k7  rAt'        | d(d      rdnt'        | d)d      }t3        || j
                  j4                  |||d*      }| j0                  j                  |       | j                  j                  j                   ||| d+                | j                  j                  j                   ||| d,                t7        | j8                  t:        j<                        r| d-|v r| j8                  j                  j                  d   || d-   j                  d.   k(  rb| j8                  j                  j                   ||| d/                | j8                  j                  j                   ||| d-                | j>                  n| d0}|d1z   }| j>                  j@                  j                   ||| d2   d%             | j>                  jB                  j                  j                  tE        jF                  d3D cg c]-  } ||| | d   d%      jI                  d      jJ                  / c}             | j>                  jB                  j                  j                  tE        jF                  d3D cg c]#  } ||| | d   d%      jM                  d.      % c}             | j>                  jN                  j                  j                   ||| d4   d%      jI                  d      jJ                         | j>                  jN                  j                  j                   ||| d5   d%      jM                  d.             | j>                  j*                  j                  j                   ||| d6         jI                  d             | j>                  j*                  j                  j                   ||| d7                | j>                  j                  j                  j                   ||| d8                | j>                  j                  j                  j                   ||| d9                t%        d:      D ]  }t'        | j>                  jP                  d;|dz          j                  j                   ||| d<| d                t'        | j>                  jP                  d;|dz          j                  j                   ||| d<| d                 |rd=nd>\  }}}t        | j"                  jS                               D ]  \  }}| d?|v r| d@}|}n
| dA| d}d}|dB| dz   }|jT                  j                  j                   ||| d8   |C             |jT                  j                  j                   ||| d9   |C             |jV                  jX                  j                  j                  tE        jF                  dDD cg c].  } ||| | d   d|E      jI                  d      jJ                  0 c}             |jV                  jX                  j                  j                  tE        jF                  dDD cg c]$  } ||| | d   d|E      jM                  d.      & c}             |jV                  j*                  j                  j                   ||| d6   |C      jI                  d             |jV                  j*                  j                  j                   ||| d7   |C             |jZ                  j                  j                   ||| dF| d   |C             |jZ                  j                  j                   ||| dF| d   |C             t%        d:      D ]  }t'        |jP                  d;|dz          j                  j                   ||| dG| dH| d   |C             t'        |jP                  d;|dz          j                  j                   ||| dG| dH| d   |C               yc c}w c c}w c c}w c c}w )JzV Load weights from .npz checkpoints for official Google Brain Flax implementation
    r   NTc                 V   || |   } rE| j                  j                        j                  j                        } j	                  |       } | j
                  dk(  rK| j                  d   | j                  d   cxk(  r| j                  d   cxk(  rdk(  rn n| j                         } |rh| j
                  dk(  r| j                  g d      } nE| j
                  dk(  r| j                  g d      } n"| j
                  dk(  r| j                  ddg      } t        j                  |       } | S )N   r   r)   r   )r   r   r   r)   r   )r   r   r)   )r   bfloat16astypefloat32arrayndimr   flattenr   r?   
from_numpy)_wtidxjnpr  	ml_dtypesnps      rD   _n2pz_load_weights.<locals>._n2pL  s    ?CB++,33CKK@B"B77a<BHHQK288A;J"((1+JJBww!|\\,/A\\),A\\1a&)b!	rE   bilinearFzopt/target/embedding/kernelzopt/target/zparams/embedding/kernelzparams/zparams/img/embedding/kernelzparams/img/backbonerX  r)   zconv_root/kernelzgn_root/scalezgn_root/biasblockz/unit/r   convz/kernelr   gnz/scalez/biaszconv_proj/kernelzgn_proj/scalezgn_proj/biaszembedding/kernelr   r  r  rd  zembedding/biascls)r  pos_embeddingz(Transformer/posembed_input/pos_embeddingr   r   rb  r   r  r  rd  zTransformer/encoder_norm/scalezTransformer/encoder_norm/biasz	head/biasr   zhead/kernelz
MAPHead_0/zMultiHeadDotProductAttention_0/probe)keyvaluezquery/kernelz
query/biasz
out/kernelzout/biaszLayerNorm_0/scalezLayerNorm_0/biasr   fczMlpBlock_0/Dense_)r   r   r)   )r)   r   r   z*Transformer/encoderblock/LayerNorm_0/scalezTransformer/encoderblock/zTransformer/encoderblock_MultiHeadDotProductAttention_)r  )queryr  r  )r  r  
LayerNorm_	MlpBlock_z/Dense_)TN).numpy	jax.numpyr  loadr  r  r  rX  r  r   copy_r/   r   r   rm   r:  stagesr'  r   getattr
downsampler;  r"   r  r   r#   re  r  r+  r=   r   r)  latentkvr?   r   r  Tr   r   ru   childrenro   rp   r  rt   )r  rN  rO  r  r  wr  r  
big_visionr  	stem_onlyrX  r1  stagejr  bprembed_conv_wpos_embed_wr   block_prefix
mha_prefixr  mha_subb_subln1_subr  r  r  r  s      `                        @@@rD   rQ  rQ  C  si     * HH_%GGO$MIJ(A-"F&!+FJ*a/"FJu  *-$$--&11	$x(--		/		0@0@0F0Fq0I4PQU[T\\lRmPnKopq		tA&>$?@A		T!vhl$;"<=>%hoo6 
W5 )%,, 7 	WHAu"85QuQUG1=B"1X ba!eW~6==CCDbTQUVWZ[V[U\\cKdIeDfga!eW~6==CCDbTQSTUXYTYSZZ`KaIbDcda!eW~6;;AA$qB4rRSVWRWQXX]I^G_B`ab ''3((--44::4RDHXBY@Z;[\((--44::4RDBV@W;XY((--2288a2$l@S>T9UV	W
W A(89:;'""))//2DfXEU;V9W4XZ"#%"3"3"8"8"?"?"E"Ebc"JJ+""))//4'
 
!!''5	%%d1x~-F+G&HI"d1xs^#4>?1x}56%@1x'OPQUZ[EOO111!(0@%!HAgV[]prsNt,&&00/'
 
OO+&	JJDfX-K#L!MNO	JJOO$qF8+H!IJKL5::ryy)hi A%JJOO!!!$VHI*>(?(E(Eb(II

Q&'=%> ?@

d1xy%9#:;<
 " ,!&EF
$$T!|nE,B*Cu%MN!!''		N^3`IJDj\!G,-7??BDD3` )a 	b%%eiiK[1]FGDj\!E*+u5==bA1] '^ 	_  &&tAL.I,Je'T'\'\]^'_'a'ab$$T!zl*,E*F%%P%X%XY[%\]##))$qJ<z1J/K*L*T*TUV*WX!!''Q*X/F-G(HI##))$qL>AR1S/T*UV!!''Q,?O/P-Q(RSq 	wAEOO''2a!eW6==CCDl^[lmnloovKwIxDyzEOO''2a!eW6;;AA$qL>YjkljmmrIsGtBuv	w ,6i9GUGell3356 T5X?@AE$X%>?LC$X%>qcCLC!&CG9A$NN
  a<.8I(J&KQT!UVtA6F&G$HcRS

##EIIWp/rRSDj\!G,-C@HHKMM/r %s 	t

!!%))Tm-oOPDj\!E*+u#>FFrJ-o #p 	q

$$T!zl*,E*FC%P%X%XYZ%[\

""4ZL*A(B#LM  a<.
7)6(R&SY\!]^tAj	&O$PVYZ[q 	TAEIIAE7|,3399Q,ywqcIJPSTVEIIAE7|,1177Q,ywqcGHcRT	T%T3`1]./r-os   ;2t14(t6(3t;")u 
state_dictc                    i }g d}| j                         D ]  \  }}|j                  |      s|j                  |d      }|D ]  }|j                  |d   |d         } |dk(  r:d}|j                  dd      }t	        j
                  |j                  d         |d<   n>|dk(  r#d	}|j                  d      j                  d      }n|d
k(  r|j                  d      }|||<    |S )N)
)conv1patch_embed.proj)positional_embeddingr   )ztransformer.resblocks.r  )ln_prer$  )ln_postr   )ln_r   )in_proj_zqkv.out_projr;  )zmlp.c_fcmlp.fc1)z
mlp.c_projzmlp.fc2r   r   r)   r;  head.weight	head.biasclass_embeddingr  r   )itemsr  replacer   r?   r   r   	unsqueeze)r  r  rO  out_dictswapsr   r   sps           rD   _convert_openai_clipr    s    
 HE   " 1||F#IIfb! 	(B		"Q%A'A	( ;AAq!A$)KK
$;H[!##AA((+A+AA!" OrE   c                    dd l }i }| j                  dd        d| v rR| j                  d      |d<   | j                  d      | d   d d df   z   |d<   | j                  d      d d dd f   |d<   | j                         D ]Z  \  }}|j                  d|      r|||j	                  d	d
      <   .|j                  d|      r|||j	                  dd      <   V|||<   \ |S )Nr   
mask_tokenregister_tokensr   r  r   r)   z(blocks\.(\d+)\.mlp\.w12\.(?:weight|bias)w12fc1z'blocks\.(\d+)\.mlp\.w3\.(?:weight|bias)w3r=  )repopr  matchr  )r  r  r  r  r   r   s         rD   _convert_dinov2r    s     HNN<&J& */@ A *{ ;j>UVWYZVZ>[ [ *{ ;AqrE B  " 188?C01HQYYue,-XX@!D/0HQYYtU+, OrE   c                 d   i }| j                         D ]  \  }}|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  d	d
      }|j                  dd      }|j                  dd      }|j                  dd      }|||<    |S )Nnorm_1ro   norm_2rt   zpreprocessor.patchifier.patch_embed.zpreprocessor.pos_embedr   ztrunk.r   zpost_trunk_norm.norm.r  z	mlp.fc1_gzmlp.fc3z	mlp.fc1_x)r  r  )r  r  r  r   r   s        rD   _convert_aimv2r    s     H  " 	1IIh(IIh(II0.AII.<IIh#II('2IIi-IIi-	 OrE   c                    ddl }| j                  d|       } dD ]  }| j                  |d        g d}i }| j                         D ]I  \  }}d|v r|D ]  \  }}|j	                  |||      } |dk(  r|dd j                  d      |d	<   E|||<   K i i }
}	|j                  d
      }|j                         D ]  \  }}|j                  |      }|s||	|<   |j                         \  }}}|
j                  ||fi       }|||<   t        |      dk(  s[t        j                  |d   |d   |d   gd      |	d| d| <    |	S )zP
    Turn a BEiT-3 checkpoint into a standard VisionTransformer state-dict.
    r   Nr  )zbeit3.text_embed.weightzbeit3.vision_embed.mask_token))zbeit3\.r   )zvision_embed\.cls_tokenr  )zvision_embed\.r  )zembed_positions\.z
pos_embed.)z	encoder\.r   )zlayers\.r  )zffn_layernorm\.r  )zffn\.zmlp.)zself_attn_layer_norm\.znorm1.)zself_attn\.zattn.)zfinal_layer_norm\.znorm2.)inner_attn_lnr   r  )z\.A\..z.B.zpos_embed.weightr   r   z1blocks\.(\d+)\.attn\.(q|k|v)_proj\.(weight|bias)$r   r   r   r   r   r  z
.attn.qkv.)r  getr  r  subr  compile	fullmatchgroups
setdefaultrz  r?   r   )r  r  r  r   rulestmpr   oldnewoutbufpatrJ  r  whichkindstashs                    rD   _convert_beit3r/  -  s    4J J  q$ E  C  " 	1A: 	$HCsC#A	$"" uq1CCF	 2C
**I
JC		 1MM!CF88:UDT{B/eu:?3899sU3Zs4!4C'#j/0 JrE   Tadapt_layer_scalec           	         ddl }i }| j                  d|       } | j                  d|       } d}d| v rt        | |      } nd| v rt        | |d	      } nd
| v rt        | |      } nt	        d | j                         D              rt        | |      } nd| v r| d   } d}nsd| v sd| v r[d}d| v ret        |j                  t        j                        rA| d   |d<   t        j                  | d   j                  d         |d<   nd| v rt        | |      } |rA| j                         D 	ci c]&  \  }}	|j!                  |      s|t#        |      d |	( } }}	| j                         D ]`  \  }}	d|v r|j$                  j&                  j(                  j                  \  }
}}}t#        |	j                        dk  rC|j$                  j&                  j(                  j                  \  }
}}}|	j+                  |
d||      }	|	j                  d   |k7  s|	j                  d   |k7  rt-        |	||f||d      }	n|dk(  rk|	j                  d   |j.                  j                  d   k7  rBt1        |dd      rdnt1        |dd      }t3        |	|j$                  j4                  |||d       }	n |rd!|v r|j7                  d"d#|      }nd$|v r\|	||<   c |S c c}	}w )%zJ convert patch embedding weight from manual patchify + linear proj to convr   Nr  r  r   zvisual.class_embeddingzmodule.visual.class_embeddingzmodule.visual.)rO  r  c              3   $   K   | ]  }d |v  
 yw)zbeit3.Nr  ).0r   s     rD   	<genexpr>z'checkpoint_filter_fn.<locals>.<genexpr>}  s     6qX]6s   encoderzmodule.zvisual.trunk.pos_embedz"visual.trunk.blocks.0.norm1.weightzvisual.trunk.zvisual.head.proj.weightr  r  z#preprocessor.patchifier.proj.weightzpatch_embed.proj.weightr  r   r   Tr  r   r)   r   Fr   r  gamma_zgamma_([0-9])z
ls\1.gammar  )r  r  r  r  anykeysr/  r  r+  r=   r   r?   r   r   r  r  r  rz  r  r;  r   r   r"   r   r  r#   re  r   )r  r  r0  r  r  r  r  rO  r   r   OIrk  rl  r   s                  rD   checkpoint_filter_fnr;  i  s    H4Jj9JF:-)*e<
	(J	6)*eDTU
		#$Z7
	6JOO$56	6#J6
	j	 	*
	!Z	/3W[e3e $
2z%**bii7X&01J&KH]#$)KK
;T0U0[0[\]0^$_H[!	.*	<#J6
5?5E5E5G`TQ1<<X^K_aFoq(`
`  "  1$)**//66<<JAq!Q177|a"..33::@@
1aIIaQ*wwr{a1772;!#3(F"/'  +!''!*0E0Ea0H"H%,U4De%LRYZ_atvwRx&**44"3+#A 8q=':AQA B OG as   #K=Kurlc                 2    | ddd dddt         t        ddd|S )	Nr   )r   r   r   g?bicubicTr  r+  )r<  r   
input_size	pool_sizecrop_pctr  fixed_input_sizer   rD  
first_conv
classifier)r   r   )r<  kwargss     rD   _cfgrF    s7    #" '%(  rE   z*vit_base_patch16_224.augreg2_in21k_ft_in1kztimm/)	hf_hub_idz*vit_base_patch16_384.augreg2_in21k_ft_in1kz)vit_base_patch8_224.augreg2_in21k_ft_in1kz)vit_tiny_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npz)r<  rG  custom_loadz)vit_tiny_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz)r     rI        ?)r<  rG  rH  r?  rA  z*vit_small_patch32_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz*vit_small_patch32_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz*vit_small_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz*vit_small_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz)vit_base_patch32_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.npzz)vit_base_patch32_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_light1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npzz)vit_base_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz)vit_base_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npzz(vit_base_patch8_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz*vit_large_patch16_224.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz*vit_large_patch16_384.augreg_in21k_ft_in1kzhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzz'vit_base_patch16_224.orig_in21k_ft_in1kzohttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth)r<  rG  z'vit_base_patch16_384.orig_in21k_ft_in1kzohttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_384-83fb41ba.pth)r<  rG  r?  rA  z(vit_large_patch32_384.orig_in21k_ft_in1kzphttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p32_384-9b920ba8.pthz!vit_small_patch16_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.npzz!vit_small_patch16_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/S_16-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npzz vit_base_patch32_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz vit_base_patch32_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_32-i1k-300ep-lr_0.001-aug_medium2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzz vit_base_patch16_224.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i1k-300ep-lr_0.001-aug_strong2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.npzz vit_base_patch16_384.augreg_in1kzhttps://storage.googleapis.com/vit_models/augreg/B_16-i1k-300ep-lr_0.001-aug_strong2-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npzzvit_large_patch14_224.untrained)r<  zvit_huge_patch14_224.untrainedzvit_giant_patch14_224.untrainedz"vit_gigantic_patch14_224.untrainedzvit_base_patch32_224.orig_in21k)rG  r   zvit_base_patch16_224.orig_in21kz vit_large_patch32_224.orig_in21kz vit_large_patch16_224.orig_in21kzvit_huge_patch14_224.orig_in21kz!vit_tiny_patch16_224.augreg_in21kzmhttps://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0.npziSU  )r<  rG  rH  r   z"vit_small_patch32_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/S_32-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npzz"vit_small_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0.npzz!vit_base_patch32_224.augreg_in21kzohttps://storage.googleapis.com/vit_models/augreg/B_32-i21k-300ep-lr_0.001-aug_medium1-wd_0.03-do_0.0-sd_0.0.npzz!vit_base_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npzz vit_base_patch8_224.augreg_in21kzmhttps://storage.googleapis.com/vit_models/augreg/B_8-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0.npzz"vit_large_patch16_224.augreg_in21kznhttps://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1.npzzvit_base_patch32_224.sam_in1kz:https://storage.googleapis.com/vit_models/sam/ViT-B_32.npz)r<  rH  rG  zvit_base_patch16_224.sam_in1kz:https://storage.googleapis.com/vit_models/sam/ViT-B_16.npzzvit_small_patch16_224.dinoz[https://dl.fbaipublicfiles.com/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth)r<  rG  r   rD  r   zvit_small_patch8_224.dinozYhttps://dl.fbaipublicfiles.com/dino/dino_deitsmall8_pretrain/dino_deitsmall8_pretrain.pthzvit_base_patch16_224.dinozWhttps://dl.fbaipublicfiles.com/dino/dino_vitbase16_pretrain/dino_vitbase16_pretrain.pthzvit_base_patch8_224.dinozUhttps://dl.fbaipublicfiles.com/dino/dino_vitbase8_pretrain/dino_vitbase8_pretrain.pthz vit_small_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pthz
apache-2.0)r     rK  )r<  rG  licenser   rD  r   r?  rA  zvit_base_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_pretrain.pthz vit_large_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_pretrain.pthz vit_giant_patch14_dinov2.lvd142mzNhttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_pretrain.pthz%vit_small_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_reg4_pretrain.pthz$vit_base_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_reg4_pretrain.pthz%vit_large_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_reg4_pretrain.pthz%vit_giant_patch14_reg4_dinov2.lvd142mzShttps://dl.fbaipublicfiles.com/dinov2/dinov2_vitg14/dinov2_vitg14_reg4_pretrain.pthzvit_base_patch16_224_miil.in21kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/vit_base_patch16_224_in21k_miil-887286df.pth)rX   rX   rX   )rJ  rJ  rJ  g      ?r  i+  )r<  rG  r   rD  rA  r  r   z'vit_base_patch16_224_miil.in21k_ft_in1kzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/vit_base_patch16_224_1k_miil_84_4-2deb18e3.pth)r<  rG  r   rD  rA  r  z vit_base_patch16_rpn_224.sw_in1kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_base_patch16_rpn_224-sw-3b07e89d.pthz#vit_medium_patch16_gap_240.sw_in12k)r      rM  gffffff?i-.  )rG  r?  rA  r   z+vit_medium_patch16_gap_256.sw_in12k_ft_in1k)r      rN  )rG  r?  rA  z+vit_medium_patch16_gap_384.sw_in12k_ft_in1ksquash)rG  r?  rA  	crop_modevit_base_patch16_gap_224z/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k)rG  r   rD  z/vit_base_patch32_clip_384.laion2b_ft_in12k_in1k)rG  r   rD  rA  r?  z/vit_base_patch32_clip_448.laion2b_ft_in12k_in1k)r     rR  z/vit_base_patch16_clip_224.laion2b_ft_in12k_in1k)rG  r   rD  rA  z/vit_base_patch16_clip_384.laion2b_ft_in12k_in1k)rG  r   rD  rA  r?  rP  z0vit_large_patch14_clip_224.laion2b_ft_in12k_in1kz0vit_large_patch14_clip_336.laion2b_ft_in12k_in1k)r   P  rS  z/vit_huge_patch14_clip_224.laion2b_ft_in12k_in1kz/vit_huge_patch14_clip_336.laion2b_ft_in12k_in1kz.vit_base_patch32_clip_224.openai_ft_in12k_in1k)r   rD  z.vit_base_patch32_clip_384.openai_ft_in12k_in1kz.vit_base_patch16_clip_224.openai_ft_in12k_in1kz.vit_base_patch16_clip_384.openai_ft_in12k_in1kz/vit_large_patch14_clip_224.openai_ft_in12k_in1kz/vit_large_patch14_clip_336.openai_ft_in12k_in1kz)vit_base_patch32_clip_224.laion2b_ft_in1kz)vit_base_patch16_clip_224.laion2b_ft_in1kz)vit_base_patch16_clip_384.laion2b_ft_in1kz*vit_large_patch14_clip_224.laion2b_ft_in1kz*vit_large_patch14_clip_336.laion2b_ft_in1kz)vit_huge_patch14_clip_224.laion2b_ft_in1kz)vit_huge_patch14_clip_336.laion2b_ft_in1kz(vit_base_patch32_clip_224.openai_ft_in1kz(vit_base_patch16_clip_224.openai_ft_in1kz(vit_base_patch16_clip_384.openai_ft_in1kz)vit_large_patch14_clip_224.openai_ft_in1kz*vit_base_patch16_clip_224.laion2b_ft_in12k)rG  r   rD  r   z+vit_large_patch14_clip_224.laion2b_ft_in12k)rG  r   rD  rA  r   z*vit_huge_patch14_clip_224.laion2b_ft_in12kz)vit_base_patch16_clip_224.openai_ft_in12kz*vit_large_patch14_clip_224.openai_ft_in12kz!vit_base_patch32_clip_224.laion2b   z!vit_base_patch16_clip_224.laion2bz"vit_large_patch14_clip_224.laion2br   z!vit_huge_patch14_clip_224.laion2b   z"vit_giant_patch14_clip_224.laion2bz%vit_gigantic_patch14_clip_224.laion2b   z'vit_base_patch32_clip_224.laion400m_e32)zDnatively QuickGELU, use quickgelu model variant for original results)rG  notesr   rD  r   z'vit_base_patch16_clip_224.laion400m_e32z,vit_base_patch16_plus_clip_240.laion400m_e32  )rG  r   rD  r?  rA  r   z(vit_large_patch14_clip_224.laion400m_e32z$vit_base_patch32_clip_224.datacompxlz$vit_base_patch32_clip_256.datacompxl)rG  r   rD  rA  r?  r   z$vit_base_patch16_clip_224.datacompxlz%vit_large_patch14_clip_224.datacompxlzvit_base_patch16_clip_224.dfn2bz
apple-ascl)rG  rL  r   rD  rA  r   z%vit_large_patch14_clip_224.dfn2b_s39bz vit_large_patch14_clip_224.dfn2b)rG  rL  rW  r   rD  rA  r   zvit_huge_patch14_clip_224.dfn5bzvit_huge_patch14_clip_378.dfn5b)r   z  rY  )rG  r   rD  rL  rW  rA  r?  r   z(vit_base_patch32_clip_224.metaclip_2pt5bzcc-by-nc-4.0z(vit_base_patch16_clip_224.metaclip_2pt5bz)vit_large_patch14_clip_224.metaclip_2pt5bz(vit_huge_patch14_clip_224.metaclip_2pt5bz-vit_huge_patch14_clip_224.metaclip_altogetherz,vit_gigantic_patch14_clip_224.metaclip_2pt5bz'vit_base_patch32_clip_224.metaclip_400mz'vit_base_patch16_clip_224.metaclip_400mz(vit_large_patch14_clip_224.metaclip_400mz vit_base_patch32_clip_224.openaiz vit_base_patch16_clip_224.openaiz!vit_large_patch14_clip_224.openai)rG  rW  r   rD  rA  r   z!vit_large_patch14_clip_336.openai)rG  rW  r   rD  rA  r?  r   z#vit_base_patch32_plus_256.untrained)r<  r?  rA  z#vit_base_patch16_plus_240.untrainedz$vit_small_patch16_36x1_224.untrainedz$vit_small_patch16_18x2_224.untrainedz#vit_base_patch16_18x2_224.untrainedz)eva_large_patch14_196.in22k_ft_in22k_in1kmit)r      r[  )rG  rL  r   rD  r?  rA  z)eva_large_patch14_336.in22k_ft_in22k_in1k)rG  rL  r   rD  r?  rA  rP  z#eva_large_patch14_196.in22k_ft_in1kz#eva_large_patch14_336.in22k_ft_in1kzflexivit_small.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k.npz)r<  rH  rG  r?  rA  zflexivit_small.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k_600ep.npzzflexivit_small.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_s_i1k_300ep.npzzflexivit_base.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k.npzzflexivit_base.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k_600ep.npzzflexivit_base.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i1k_300ep.npzzflexivit_base.1000ep_in21kzMhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i21k_1000ep.npz)r<  rH  rG  r?  rA  r   zflexivit_base.300ep_in21kzLhttps://storage.googleapis.com/big_vision/flexivit/flexivit_b_i21k_300ep.npzzflexivit_large.1200ep_in1kzEhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k.npzzflexivit_large.600ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k_600ep.npzzflexivit_large.300ep_in1kzKhttps://storage.googleapis.com/big_vision/flexivit/flexivit_l_i1k_300ep.npzzflexivit_base.patch16_in21kzIhttps://storage.googleapis.com/big_vision/flexivit/vit_b16_i21k_300ep.npzzflexivit_base.patch30_in21kzIhttps://storage.googleapis.com/big_vision/flexivit/vit_b30_i21k_300ep.npzz!vit_base_patch16_xp_224.untrainedz"vit_large_patch14_xp_224.untrainedz!vit_huge_patch14_xp_224.untrainedzvit_base_patch16_224.maezEhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_base.pth)r<  rG  rL  r   rD  r   zvit_large_patch16_224.maezFhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_large.pthzvit_huge_patch14_224.maezEhttps://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_huge.pthz#vit_huge_patch14_gap_224.in1k_ijepaz?https://dl.fbaipublicfiles.com/ijepa/IN1K-vit.h.14-300e.pth.tar)r<  rL  r   rD  r   z$vit_huge_patch14_gap_224.in22k_ijepaz@https://dl.fbaipublicfiles.com/ijepa/IN22K-vit.h.14-900e.pth.tarz#vit_huge_patch16_gap_448.in1k_ijepazEhttps://dl.fbaipublicfiles.com/ijepa/IN1K-vit.h.16-448px-300e.pth.tar)r<  rL  r?  rA  r   rD  r   z%vit_giant_patch16_gap_224.in22k_ijepaz@https://dl.fbaipublicfiles.com/ijepa/IN22K-vit.g.16-600e.pth.tarz$vit_base_patch32_siglip_256.v2_webli)rG  r?  r   z$vit_base_patch16_siglip_224.v2_webliz!vit_base_patch16_siglip_224.webliz$vit_base_patch16_siglip_256.v2_webliz!vit_base_patch16_siglip_256.webliz&vit_base_patch16_siglip_256.webli_i18nz$vit_base_patch16_siglip_384.v2_webliz!vit_base_patch16_siglip_384.webliz$vit_base_patch16_siglip_512.v2_webli)r   rT  rT  z!vit_base_patch16_siglip_512.webliz%vit_large_patch16_siglip_256.v2_webliz"vit_large_patch16_siglip_256.webliz%vit_large_patch16_siglip_384.v2_webliz"vit_large_patch16_siglip_384.webliz%vit_large_patch16_siglip_512.v2_webliz&vit_so400m_patch14_siglip_224.v2_webliz#vit_so400m_patch14_siglip_224.webliz&vit_so400m_patch14_siglip_378.v2_webliz#vit_so400m_patch14_siglip_378.webliz#vit_so400m_patch14_siglip_384.webliz&vit_so400m_patch16_siglip_256.v2_webliz(vit_so400m_patch16_siglip_256.webli_i18nz&vit_so400m_patch16_siglip_384.v2_webliz&vit_so400m_patch16_siglip_512.v2_webliz(vit_giantopt_patch16_siglip_256.v2_webliz(vit_giantopt_patch16_siglip_384.v2_webliz(vit_base_patch32_siglip_gap_256.v2_webliz(vit_base_patch16_siglip_gap_224.v2_webliz%vit_base_patch16_siglip_gap_224.webliz(vit_base_patch16_siglip_gap_256.v2_webliz%vit_base_patch16_siglip_gap_256.webliz*vit_base_patch16_siglip_gap_256.webli_i18nz(vit_base_patch16_siglip_gap_384.v2_webliz%vit_base_patch16_siglip_gap_384.webliz(vit_base_patch16_siglip_gap_512.v2_webliz%vit_base_patch16_siglip_gap_512.webliz)vit_large_patch16_siglip_gap_256.v2_webliz&vit_large_patch16_siglip_gap_256.webliz)vit_large_patch16_siglip_gap_384.v2_webliz&vit_large_patch16_siglip_gap_384.webliz)vit_large_patch16_siglip_gap_512.v2_webliz*vit_so400m_patch14_siglip_gap_224.v2_webliz'vit_so400m_patch14_siglip_gap_224.webliz*vit_so400m_patch14_siglip_gap_224.pali_mixz)vit_so400m_patch14_siglip_gap_224.pali_ptz-vit_so400m_patch14_siglip_gap_224.pali2_3b_ptz.vit_so400m_patch14_siglip_gap_224.pali2_10b_ptz*vit_so400m_patch14_siglip_gap_378.v2_webliz'vit_so400m_patch14_siglip_gap_378.webliz'vit_so400m_patch14_siglip_gap_384.webliz*vit_so400m_patch14_siglip_gap_448.pali_mixz)vit_so400m_patch14_siglip_gap_448.pali_ptz2vit_so400m_patch14_siglip_gap_448.pali_refcoco_segz-vit_so400m_patch14_siglip_gap_448.pali_ocrvqaz-vit_so400m_patch14_siglip_gap_448.pali2_3b_ptz.vit_so400m_patch14_siglip_gap_448.pali2_10b_ptz0vit_so400m_patch14_siglip_gap_448.pali2_3b_docciz1vit_so400m_patch14_siglip_gap_448.pali2_10b_docciz)vit_so400m_patch14_siglip_gap_896.pali_pt)r     r\  z2vit_so400m_patch14_siglip_gap_896.pali_refcoco_segz-vit_so400m_patch14_siglip_gap_896.pali_ocrvqaz-vit_so400m_patch14_siglip_gap_896.pali2_3b_ptz.vit_so400m_patch14_siglip_gap_896.pali2_10b_ptz*vit_so400m_patch16_siglip_gap_256.v2_webliz,vit_so400m_patch16_siglip_gap_256.webli_i18nz*vit_so400m_patch16_siglip_gap_384.v2_webliz*vit_so400m_patch16_siglip_gap_512.v2_webliz,vit_giantopt_patch16_siglip_gap_256.v2_webliz,vit_giantopt_patch16_siglip_gap_384.v2_webliz+vit_so400m_patch14_siglip_378.webli_ft_in1kz/vit_so400m_patch14_siglip_gap_378.webli_ft_in1kz,vit_xsmall_patch16_clip_224.tinyclip_yfcc15m)rG  rL  r   rD  r   z.vit_medium_patch32_clip_224.tinyclip_laion400mz,vit_medium_patch16_clip_224.tinyclip_yfcc15mz/vit_betwixt_patch32_clip_224.tinyclip_laion400mz%vit_wee_patch16_reg1_gap_256.sbb_in1kz&vit_pwee_patch16_reg1_gap_256.sbb_in1kz1vit_little_patch16_reg1_gap_256.sbb_in12k_ft_in1kz)vit_little_patch16_reg1_gap_256.sbb_in12k)rG  r   r?  rA  z(vit_little_patch16_reg4_gap_256.sbb_in1kz(vit_medium_patch16_reg1_gap_256.sbb_in1kz1vit_medium_patch16_reg4_gap_256.sbb_in12k_ft_in1kz(vit_medium_patch16_reg4_gap_256.sbb_in1kz)vit_medium_patch16_reg4_gap_256.sbb_in12kz8vit_mediumd_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1kz2vit_mediumd_patch16_reg4_gap_256.sbb_in12k_ft_in1kz0vit_mediumd_patch16_reg4_gap_256.sbb2_e200_in12kz*vit_mediumd_patch16_reg4_gap_256.sbb_in12kz8vit_mediumd_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1kz)vit_betwixt_patch16_reg1_gap_256.sbb_in1kz8vit_betwixt_patch16_reg4_gap_256.sbb2_e200_in12k_ft_in1kz2vit_betwixt_patch16_reg4_gap_256.sbb_in12k_ft_in1kz)vit_betwixt_patch16_reg4_gap_256.sbb_in1kz0vit_betwixt_patch16_reg4_gap_256.sbb2_e200_in12kz*vit_betwixt_patch16_reg4_gap_256.sbb_in12kz8vit_betwixt_patch16_reg4_gap_384.sbb2_e200_in12k_ft_in1kz'vit_base_patch16_reg4_gap_256.untrained)r?  z6vit_so150m_patch16_reg4_gap_256.sbb_e250_in12k_ft_in1kz.vit_so150m_patch16_reg4_gap_256.sbb_e250_in12kz6vit_so150m_patch16_reg4_gap_384.sbb_e250_in12k_ft_in1kz)vit_so150m_patch16_reg4_map_256.untrainedz7vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12k_ft_in1kz/vit_so150m2_patch16_reg1_gap_256.sbb_e200_in12kz7vit_so150m2_patch16_reg1_gap_384.sbb_e200_in12k_ft_in1kz7vit_so150m2_patch16_reg1_gap_448.sbb_e200_in12k_ft_in1kz$vit_intern300m_patch14_448.ogvl_distz$vit_intern300m_patch14_448.ogvl_2pt5z aimv2_large_patch14_224.apple_pt)rG  r   rD  rL  rA  r   z%aimv2_large_patch14_224.apple_pt_distzaimv2_huge_patch14_224.apple_ptzaimv2_1b_patch14_224.apple_ptzaimv2_3b_patch14_224.apple_ptz aimv2_large_patch14_336.apple_pt)rG  r   rD  rL  r?  rA  r   z%aimv2_large_patch14_336.apple_pt_distzaimv2_huge_patch14_336.apple_ptzaimv2_1b_patch14_336.apple_ptzaimv2_3b_patch14_336.apple_ptz aimv2_large_patch14_448.apple_ptzaimv2_huge_patch14_448.apple_ptzaimv2_1b_patch14_448.apple_ptzaimv2_3b_patch14_448.apple_ptztest_vit.r160_in1k)r      r]  ztest_vit2.r160_in1kztest_vit3.r160_in1kztest_vit4.r160_in1k)r?  rA  z$beit3_base_patch16_224.in22k_ft_in1kz-beit3_base_patch16_224.indomain_in22k_ft_in1kz%beit3_large_patch16_224.in22k_ft_in1kz.beit3_large_patch16_224.indomain_in22k_ft_in1kz!beit3_giant_patch14_224.untrained)r<  r   rD  rA  z!beit3_giant_patch14_336.untrained)r<  r?  r   rD  rA  zbeit3_base_patch16_224.pt)z"beit3_base_patch16_224.indomain_ptzbeit3_large_patch16_224.ptz#beit3_large_patch16_224.indomain_ptrW  	quickgelurG  _clip__clip_quickgelu_TIMM_USE_NAFLEXVITfalsetruevariant
pretrained
use_naflex	NaFlexVitc           	      0   |t         }|rddlm}  || |fi |S |j                  dd      }d| v rt	        t
        dd      }nt
        }|j                  d	d
      }d| v r|j                  dd       dk7  rd}t        t        | |f||t        |d      d|S )Nr)   )_create_naflexvit_from_classicout_indicesr   flexir  F)r  r  pretrained_strictTsiglipr   r   getter)rj  feature_cls)pretrained_filter_fnrl  feature_cfg)
_USE_NAFLEX_DEFAULT	naflexvitri  r  r   r;  r  r*   r3   r  )rd  re  rf  rE  ri  rj  
_filter_fnstricts           rD   _create_vision_transformerrv  \
  s     (
=-gzLVLL**]A.K' 1W\]
)
 ZZ+T2F7vzz->%G ( [hG  rE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z ViT-Tiny (Vit-Ti/16)
    r      r   r   r   r   r   rY   re  )vit_tiny_patch16_224r  rv  re  rE  
model_argsr  s       rD   rz  rz  
  8     s"JJ&s*sX\]gXrkqXrsELrE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z% ViT-Tiny (Vit-Ti/16) @ 384x384.
    r   rx  r   r   ry  re  )vit_tiny_patch16_384r{  r|  s       rD   r  r  
  r~  rE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z ViT-Small (ViT-S/32)
        rI  r      ry  re  )vit_small_patch32_224r{  r|  s       rD   r  r  
  8     s"JJ&t:tY]^hYslrYstELrE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z& ViT-Small (ViT-S/32) at 384x384.
    r  rI  r   r  ry  re  )vit_small_patch32_384r{  r|  s       rD   r  r  
  r  rE   c           	      R    t        dddd      }t        dd| it        |fi |}|S ) ViT-Small (ViT-S/16)
    r   rI  r   r  ry  re  )vit_small_patch16_224r{  r|  s       rD   r  r  
  r  rE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )r  r   rI  r   r  ry  re  )vit_small_patch16_384r{  r|  s       rD   r  r  
  r  rE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z ViT-Small (ViT-S/8)
       rI  r   r  ry  re  )vit_small_patch8_224r{  r|  s       rD   r  r  
  s8     cqIJ&s*sX\]gXrkqXrsELrE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z ViT-Base (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k, source https://github.com/google-research/vision_transformer.
    r  r   r   ry  re  )vit_base_patch32_224r{  r|  s       rD   r  r  
  8    
 s"KJ&s*sX\]gXrkqXrsELrE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z ViT-Base model (ViT-B/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
    r  r   r   ry  re  )vit_base_patch32_384r{  r|  s       rD   r  r  
  r  rE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
    r   r   r   ry  re  )vit_base_patch16_224r{  r|  s       rD   r  r  
  r  rE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z ViT-Base model (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
    r   r   r   ry  re  )vit_base_patch16_384r{  r|  s       rD   r  r  
  r  rE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z ViT-Base (ViT-B/8) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
    r  r   r   ry  re  )vit_base_patch8_224r{  r|  s       rD   r  r  
  s8    
 crJJ&rrW[\fWqjpWqrELrE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )zo ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929). No pretrained weights.
    r  rU     r   ry  re  )vit_large_patch32_224r{  r|  s       rD   r  r  
  8     t2LJ&t:tY]^hYslrYstELrE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z ViT-Large model (ViT-L/32) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
    r  rU  r  r   ry  re  )vit_large_patch32_384r{  r|  s       rD   r  r  
  8    
 t2LJ&t:tY]^hYslrYstELrE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 224x224, source https://github.com/google-research/vision_transformer.
    r   rU  r  ry  re  )vit_large_patch16_224r{  r|  s       rD   r  r    r  rE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z ViT-Large model (ViT-L/16) from original paper (https://arxiv.org/abs/2010.11929).
    ImageNet-1k weights fine-tuned from in21k @ 384x384, source https://github.com/google-research/vision_transformer.
    r   rU  r  ry  re  )vit_large_patch16_384r{  r|  s       rD   r  r    r  rE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )z  ViT-Large model (ViT-L/14)
       rU  r  r   ry  re  )vit_large_patch14_224r{  r|  s       rD   r  r    r  rE   c           	      R    t        dddd      }t        dd| it        |fi |}|S )zW ViT-Huge model (ViT-H/14) from original paper (https://arxiv.org/abs/2010.11929).
    r  rV  r  r   ry  re  )vit_huge_patch14_224r{  r|  s       rD   r  r  #  s8     t2LJ&s*sX\]gXrkqXrsELrE   c           	      T    t        ddddd      }t        dd| it        |fi |}|S )	zq ViT-Giant (little-g) model (ViT-g/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    r    tE]t@(   r   r   r   rZ   r   rY   re  )vit_giant_patch14_224r{  r|  s       rD   r  r  ,  s;     tuBZ\]J&t:tY]^hYslrYstELrE   c           	      V    t        ddddd      }t        	 dd| it        |fi |}|S )	zq ViT-Gigantic (big-G) model (ViT-G/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    r    ;;@0   r   r  re  )vit_gigantic_patch14_224r{  r|  s       rD   r  r  5  sG     tuBZ\]J&"Y/9Y=A*=WPV=WYELrE   c           	      V    t        ddddd      }t        	 dd| it        |fi |}|S )z ViT-Base (ViT-B/16) from original paper (https://arxiv.org/abs/2010.11929).
    Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
    r   r   r   F)r   r   r   rY   r[   re  )vit_base_patch16_224_miilr{  r|  s       rD   r  r  ?  sG    
 s"UZ[J&#Z0:Z>B:>XQW>XZELrE   c                 ^    t        ddddddddd	      }t        	 d
d	| it        |fi |}|S )zB ViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 240x240
    r   rT  r   r  Fr   rE  	r   r   r   rY   r   r   r[   r7   r   re  )vit_medium_patch16_gap_240r{  r|  s       rD   r  r  J  U     B!EtULJ '$[1;[?CJ?YRX?Y[ELrE   c                 ^    t        ddddddddd	      }t        	 d
d	| it        |fi |}|S )zB ViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 256x256
    r   rT  r   r  Fr   rE  r  re  vit_medium_patch16_gap_256r{  r|  s       rD   r  r  V  r  rE   c                 ^    t        ddddddddd	      }t        	 d
d	| it        |fi |}|S )zB ViT-Medium (ViT-M/16) w/o class token, w/ avg-pool @ 384x384
    r   rT  r   r  Fr   rE  r  re  )vit_medium_patch16_gap_384r{  r|  s       rD   r  r  b  r  rE   c                 ^    t        ddddddddd	      }t        	 d
d	| it        |fi |}|S )zC ViT-Betwixt (ViT-b/16) w/o class token, w/ avg-pool @ 256x256
    r   rX  r   
   Fr   rE  r  re  r  r{  r|  s       rD   vit_betwixt_patch16_gap_256r  n  sU     B"%EtULJ '$[1;[?CJ?YRX?Y[ELrE   c           	      Z    t        ddddddd      }t        	 dd| it        |fi |}|S )	z@ ViT-Base (ViT-B/16) w/o class token, w/ avg-pool @ 224x224
    r   r   r   Fr   r   r   r   rY   r   r   r   re  )rQ  r{  r|  s       rD   rQ  rQ  z  sP     B"%]blqsJ&"Y/9Y=A*=WPV=WYELrE   c           	      Z    t        ddddddd      }t        	 d	d| it        |fi |}|S )
z; ViT-Huge model (ViT-H/14) w/ no class token, avg pool
    r  rV  r  r   Fr   r  re  )vit_huge_patch14_gap_224r{  r|  s       rD   r  r    P     R25^cmrtJ&"Y/9Y=A*=WPV=WYELrE   c           	      Z    t        ddddddd      }t        	 dd| it        |fi |}|S )	zE ViT-Huge model (ViT-H/16) w/ no class token, avg pool @ 448x448
    r   rV  r  Fr   r  re  )vit_huge_patch16_gap_448r{  r|  s       rD   r  r    r  rE   c           
      \    t        dddddddd      }t        	 d	d| it        |fi |}|S )
zH ViT-Giant (little-gg) model (ViT-g/16) w/ no class token, avg pool
    r   r  r  r  Fr   r   r   r   rY   rZ   r   r   r   re  )vit_giant_patch16_gap_224r{  r|  s       rD   r  r    sR     R2ue=J '#Z0:Z>B:>XQW>XZELrE   c           
      t    t        ddddt        t        d            }t        	 d	d| it        |fi |}|S )
NrN  r  r  TrJ   epsr   r   rY   r   rd   re  )vit_xsmall_patch16_clip_224r  r   r   rv  r|  s       rD   r  r    P     2TV]^gmqVrsJ&%\2<\@DZ@ZSY@Z\ELrE   c                 v    t        dddddt        t        d            }t        	 d
d	| it        |fi |}|S )Nr  rT  r   r  TrJ   r  r   r   r   rY   r   rd   re  )vit_medium_patch32_clip_224r  r|  s       rD   r  r    sV     B!dW^_hnrWsuJ&%\2<\@DZ@ZSY@Z\ELrE   c           
      t    t        ddddt        t        d            }t        	 d	d| it        |fi |}|S )
NrT  r   r  TrJ   r  r  re  )vit_medium_patch16_clip_224r  r|  s       rD   r  r    r  rE   c                 v    t        dddddt        t        d            }t        	 d
d	| it        |fi |}|S )Nr  rX  r   r  TrJ   r  r  re  )vit_betwixt_patch32_clip_224r  r|  s       rD   r  r    sV     B"tX_`iosXtvJ&&]3=]AEjA[TZA[]ELrE   c                 v    t        dddddt        t        d            }t        	 d	d| it        |fi |}|S )
) ViT-B/32 CLIP image tower @ 224x224
    r  r   r   TrJ   r  r  re  )vit_base_patch32_clip_224r  r|  s       rD   r  r    V     B"tX_`iosXtvJ&#Z0:Z>B:>XQW>XZELrE   c                 v    t        dddddt        t        d            }t        	 d	d| it        |fi |}|S )
z) ViT-B/32 CLIP image tower @ 256x256
    r  r   r   TrJ   r  r  re  )vit_base_patch32_clip_256r  r|  s       rD   r  r    r  rE   c                 v    t        dddddt        t        d            }t        	 d	d| it        |fi |}|S )
z) ViT-B/32 CLIP image tower @ 384x384
    r  r   r   TrJ   r  r  re  )vit_base_patch32_clip_384r  r|  s       rD   r  r    r  rE   c                 v    t        dddddt        t        d            }t        	 d	d| it        |fi |}|S )
z) ViT-B/32 CLIP image tower @ 448x448
    r  r   r   TrJ   r  r  re  )vit_base_patch32_clip_448r  r|  s       rD   r  r    r  rE   c                 v    t        dddddt        t        d            }t        	 d	d| it        |fi |}|S )
z ViT-B/16 CLIP image tower
    r   r   r   TrJ   r  r  re  )vit_base_patch16_clip_224r  r|  s       rD   r  r    r  rE   c                 v    t        dddddt        t        d            }t        	 d	d| it        |fi |}|S )
z) ViT-B/16 CLIP image tower @ 384x384
    r   r   r   TrJ   r  r  re  )vit_base_patch16_clip_384r  r|  s       rD   r  r    r  rE   c                 v    t        dddddt        t        d            }t        	 d
d	| it        |fi |}|S )z5 ViT-Base (ViT-B/16+) CLIP image tower @ 240x240
    r   r\  r   r  TrJ   r  r  re  )vit_base_patch16_plus_clip_240r  r|  s       rD   r  r    sV     B"tX_`iosXtvJ&(_5?_CG
C]V\C]_ELrE   c                 v    t        dddddt        t        d            }t        	 d
d	| it        |fi |}|S )z1 ViT-Large model (ViT-L/14) CLIP image tower
    r  rU  r  r   TrJ   r  r  re  )vit_large_patch14_clip_224r  r|  s       rD   r  r    V     R2Y`ajptYuwJ&$[1;[?CJ?YRX?Y[ELrE   c                 v    t        dddddt        t        d            }t        	 d
d	| it        |fi |}|S )z; ViT-Large model (ViT-L/14) CLIP image tower @ 336x336
    r  rU  r  r   TrJ   r  r  re  )vit_large_patch14_clip_336r  r|  s       rD   r  r  %  r  rE   c                 v    t        dddddt        t        d            }t        	 d
d	| it        |fi |}|S )z1 ViT-Huge model (ViT-H/14) CLIP image tower.
    r  rV  r  r   TrJ   r  r  re  )vit_huge_patch14_clip_224r  r|  s       rD   r  r  0  V     R2Y`ajptYuwJ&#Z0:Z>B:>XQW>XZELrE   c                 v    t        dddddt        t        d            }t        	 d
d	| it        |fi |}|S )z: ViT-Huge model (ViT-H/14) CLIP image tower @ 336x336
    r  rV  r  r   TrJ   r  r  re  )vit_huge_patch14_clip_336r  r|  s       rD   r  r  ;  r  rE   c                 v    t        dddddt        t        d            }t        	 d
d	| it        |fi |}|S )z: ViT-Huge model (ViT-H/14) CLIP image tower @ 378x378
    r  rV  r  r   TrJ   r  r  re  )vit_huge_patch14_clip_378r  r|  s       rD   r  r  F  r  rE   c                 x    t        ddddddt        t        d      	      }t        	 dd
| it        |fi |}|S )z ViT-Giant (little-g) model (ViT-g/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    Pretrained weights from CLIP image tower.
    r  r  r  r  r   TrJ   r  r   r   rZ   r   rY   r   rd   re  )vit_giant_patch14_clip_224r  r|  s       rD   r  r  Q  sX    
 bBY]9$/J '$[1;[?CJ?YRX?Y[ELrE   c                 x    t        ddddddt        t        d      	      }t        	 dd
| it        |fi |}|S )z ViT-bigG model (ViT-G/14) from `Scaling Vision Transformers` - https://arxiv.org/abs/2106.04560
    Pretrained weights from CLIP image tower.
    r  r  r  r  r   TrJ   r  r  re  )vit_gigantic_patch14_clip_224r  r|  s       rD   r  r  _  sX    
 bBY]9$/J ''^4>^BFzB\U[B\^ELrE   c                 x    t        dddddt        t        d      d      }t        	 d
d	| it        |fi |}|S )r  r  r   r   TrJ   r  
quick_gelur   r   r   rY   r   rd   rc   re  )#vit_base_patch32_clip_quickgelu_224r  r|  s       rD   r   r   m  W     B"t9$/<J '-d:DdHLZHb[aHbdELrE   c                 x    t        dddddt        t        d      d      }t        	 d
d	| it        |fi |}|S )z0 ViT-B/16 CLIP image tower w/ QuickGELU act
    r   r   r   TrJ   r  r  r  re  )#vit_base_patch16_clip_quickgelu_224r  r|  s       rD   r  r  z  r  rE   c                 x    t        dddddt        t        d      d	      }t        	 dd
| it        |fi |}|S )zB ViT-Large model (ViT-L/14) CLIP image tower w/ QuickGELU act
    r  rU  r  r   TrJ   r  r  r  re  )$vit_large_patch14_clip_quickgelu_224r  r|  s       rD   r  r    W     R29$/<J '.e;EeIMjIc\bIceELrE   c                 x    t        dddddt        t        d      d	      }t        	 dd
| it        |fi |}|S )zL ViT-Large model (ViT-L/14) CLIP image tower @ 336x336 w/ QuickGELU act
    r  rU  r  r   TrJ   r  r  r  re  )$vit_large_patch14_clip_quickgelu_336r  r|  s       rD   r  r    r  rE   c                 x    t        dddddt        t        d      d	      }t        	 dd
| it        |fi |}|S )zB ViT-Huge model (ViT-H/14) CLIP image tower w/ QuickGELU act.
    r  rV  r  r   TrJ   r  r  r  re  )#vit_huge_patch14_clip_quickgelu_224r  r|  s       rD   r
  r
    W     R29$/<J '-d:DdHLZHb[aHbdELrE   c                 x    t        dddddt        t        d      d	      }t        	 dd
| it        |fi |}|S )zK ViT-Huge model (ViT-H/14) CLIP image tower @ 378x378 w/ QuickGELU act
    r  rV  r  r   TrJ   r  r  r  re  )#vit_huge_patch14_clip_quickgelu_378r  r|  s       rD   r  r    r  rE   c                 z    t        ddddddt        t        d      d	
      }t        	 dd| it        |fi |}|S )z0 ViT-bigG model (ViT-G/14) w/ QuickGELU act
    r  r  r  r  r   TrJ   r  r  )r   r   rZ   r   rY   r   rd   rc   re  )'vit_gigantic_patch14_clip_quickgelu_224r  r|  s       rD   r  r    s[     bBY]9$/<J '1h>HhLPQ[Lf_eLfhELrE   c           	      V    t        ddddd      }t        	 dd| it        |fi |}|S )	z ViT-Base (ViT-B/32+)
    r  r\  r   r  rJ   r   r   r   rY   r7   re  )vit_base_patch32_plus_256r{  r|  s       rD   r  r    G     s"X\]J&#Z0:Z>B:>XQW>XZELrE   c           	      V    t        ddddd      }t        	 dd| it        |fi |}|S )	z ViT-Base (ViT-B/16+)
    r   r\  r   r  rJ   r  re  )vit_base_patch16_plus_240r{  r|  s       rD   r  r    r  rE   c                 f    t        dddddddt        d	      }t        	 d	d| it        |fi |}|S )
z/ ViT-Base (ViT-B/16) w/ residual post-norm
    r   r   r   FrJ   r   )	r   r   r   rY   r[   r7   r   r  r   re  )vit_base_patch16_rpn_224)r  r   rv  r|  s       rD   r  r    sV     B"uZ^LeEJ '"Y/9Y=A*=WPV=WYELrE   c           	      V    t        ddddd      }t        	 dd| it        |fi |}|S )	a   ViT-Base w/ LayerScale + 36 x 1 (36 block serial) config. Experimental, may remove.
    Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
    Paper focuses on 24x2 + 48x1 for 'Small' width but those are extremely slow.
    r   rI  $   r  rJ   r  re  )vit_small_patch16_36x1_224r{  r|  s       rD   r  r    sG     s"W[\J&$[1;[?CJ?YRX?Y[ELrE   c           	      `    t        dddddt              }t        	 dd| it        |fi |}|S )	a   ViT-Small w/ LayerScale + 18 x 2 (36 block parallel) config. Experimental, may remove.
    Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
    Paper focuses on 24x2 + 48x1 for 'Small' width but those are extremely slow.
    r   rI     r  rJ   r   r   r   rY   r7   r  re  )vit_small_patch16_18x2_224r  r   rv  r|  s       rD   r  r    sM     B!XkmJ&$[1;[?CJ?YRX?Y[ELrE   c           	      `    t        dddddt              }t        	 dd| it        |fi |}|S )	z ViT-Base w/ LayerScale + 18 x 2 (36 block parallel) config. Experimental, may remove.
    Based on `Three things everyone should know about Vision Transformers` - https://arxiv.org/abs/2203.09795
    r   r   r  r   rJ   r  re  )vit_base_patch16_18x2_224r  r|  s       rD   r!  r!    sM    
 B"$YlnJ&#Z0:Z>B:>XQW>XZELrE   c           	      V    t        ddddd      }t        	 dd| it        |fi |}|S )	zG EVA-large model https://arxiv.org/abs/2211.07636 /via MAE MIM pretrainr  rU  r  r   r   r   r   r   rY   r   re  )eva_large_patch14_196r{  r|  s       rD   r$  r$    sF     t2Y^_J&V,6V:>z:TV:TVELrE   c           	      T    t        ddddd      }t        dd| it        |fi |}|S )	zF EVA-large model https://arxiv.org/abs/2211.07636 via MAE MIM pretrainr  rU  r  r   r   r#  re  )eva_large_patch14_336r{  r|  s       rD   r&  r&    s;     t2Y^_J&t:tY]^hYslrYstELrE   c           	      T    t        ddddd      }t        dd| it        |fi |}|S )	z FlexiViT-Small
    r   rI  r   r  Tr   r   r   rY   r   re  )flexivit_smallr{  r|  s       rD   r)  r)     s;     s"Z^_J&mJmRVWaRlekRlmELrE   c           	      T    t        ddddd      }t        dd| it        |fi |}|S )z FlexiViT-Base
    r   r   r   Tr(  re  )flexivit_baser{  r|  s       rD   r+  r+  )  s;     s"[_`J&l:lQUV`QkdjQklELrE   c           	      T    t        ddddd      }t        dd| it        |fi |}|S )z FlexiViT-Large
    r   rU  r  Tr(  re  )flexivit_larger{  r|  s       rD   r-  r-  2  s;     t2\`aJ&mJmRVWaRlekRlmELrE   c                 p    t        ddddddt        t        dd
      }t        	 dd| it        |fi |}|S )	H ViT-Large model (ViT-L/14) w/ parallel blocks and qk norm enabled.
    r   r   r   TF
r   r   r   rY   r   r   rd   r  r[   r\   re  )vit_base_patch16_xp_224r  r   r   rv  r|  s       rD   r1  r1  ;  sX     B"t\`%9ESWJ '!X.8X<@<Vv<VXELrE   c                 p    t        ddddddt        t        dd
      }t        	 d	d| it        |fi |}|S )
r/  r  rU  r  r   TFr0  re  )vit_large_patch14_xp_224r2  r|  s       rD   r4  r4  H  sY     R2]a%9ESWJ '"Y/9Y=A*=WPV=WYELrE   c                 p    t        ddddddt        t        dd
      }t        	 d	d| it        |fi |}|S )
zG ViT-Huge model (ViT-H/14) w/ parallel blocks and qk norm enabled.
    r  rV  r  r   TFr0  re  )vit_huge_patch14_xp_224r2  r|  s       rD   r6  r6  U  sX     R2]a%9ESWJ '!X.8X<@<Vv<VXELrE   c           	      V    t        ddddd      }t        	 dd| it        |fi |}|S )	z ViT-S/14 for DINOv2
    r  rI  r   r  rJ   r  re  )vit_small_patch14_dinov2r{  r|  s       rD   r8  r8  b  sG     s"W[\J&"Y/9Y=A*=WPV=WYELrE   c           	      V    t        ddddd      }t        	 dd| it        |fi |}|S )z ViT-B/14 for DINOv2
    r  r   r   rJ   r  re  )vit_base_patch14_dinov2r{  r|  s       rD   r:  r:  l  sF     s"X\]J&!X.8X<@<Vv<VXELrE   c           	      V    t        ddddd      }t        	 dd| it        |fi |}|S )	z ViT-L/14 for DINOv2
    r  rU  r  r   rJ   r  re  )vit_large_patch14_dinov2r{  r|  s       rD   r<  r<  v  sG     t2Y]^J&"Y/9Y=A*=WPV=WYELrE   c           
          t        ddddddt        t        j                        }t	        	 d	d| it        |fi |}|S )
 ViT-G/14 for DINOv2
    r     r  r  rJ   h˹WU@)r   r   r   rY   r7   rZ   re   rc   re  )vit_giant_patch14_dinov2r  r   r=   SiLUrv  r|  s       rD   rA  rA    sV     R24J '"Y/9Y=A*=WPV=WYELrE   c           	      Z    t        ddddddd      }t        	 d
d	| it        |fi |}|S )z( ViT-S/14 for DINOv2 w/ 4 registers
    r  rI  r   r  rJ   r  Tr   r   r   rY   r7   r   r   re  )vit_small_patch14_reg4_dinov2r{  r|  s       rD   rF  rF    sP     B!TJ ''^4>^BFzB\U[B\^ELrE   c           	      Z    t        ddddddd      }t        	 d	d| it        |fi |}|S )
z( ViT-B/14 for DINOv2 w/ 4 registers
    r  r   r   rJ   r  TrE  re  )vit_base_patch14_reg4_dinov2r{  r|  s       rD   rH  rH    sP     B"$TJ '&]3=]AEjA[TZA[]ELrE   c           	      Z    t        ddddddd      }t        	 d
d	| it        |fi |}|S )z( ViT-L/14 for DINOv2 w/ 4 registers
    r  rU  r  r   rJ   r  TrE  re  )vit_large_patch14_reg4_dinov2r{  r|  s       rD   rJ  rJ    sP     R24TJ ''^4>^BFzB\U[B\^ELrE   c                     t        ddddddt        t        j                  dd	
      }t	        	 dd
| it        |fi |}|S )r>  r  r?  r  r  rJ   r@  r  T)
r   r   r   rY   r7   rZ   re   rc   r   r   re  )vit_giant_patch14_reg4_dinov2rB  r|  s       rD   rL  rL    s\     R24[f"''aPTJ ''^4>^BFzB\U[B\^ELrE   c           	      Z    t        ddddddd      }t        	 d	d| it        |fi |}|S )
Nr  r   r   Fr   	gelu_tanhr   r   r   rY   r   r   rc   re  )vit_base_patch32_siglip_256r{  r|  s       rD   rP  rP    sO    B"%]bJ '%\2<\@DZ@ZSY@Z\ELrE   c           	      X    t        dddddd      }t        	 dd| it        |fi |}|S )	Nr   r   r   Fr   r   r   r   rY   r   r   re  )vit_base_patch16_siglip_224r{  r|  s       rD   rS  rS    L    B"%]bJ '%\2<\@DZ@ZSY@Z\ELrE   c           	      X    t        dddddd      }t        	 dd| it        |fi |}|S )	Nr   r   r   Fr   rR  re  )vit_base_patch16_siglip_256r{  r|  s       rD   rV  rV    rT  rE   c           	      X    t        dddddd      }t        	 dd| it        |fi |}|S )	Nr   r   r   Fr   rR  re  )vit_base_patch16_siglip_384r{  r|  s       rD   rX  rX    rT  rE   c           	      X    t        dddddd      }t        	 dd| it        |fi |}|S )	Nr   r   r   Fr   rR  re  )vit_base_patch16_siglip_512r{  r|  s       rD   rZ  rZ    rT  rE   c           	      X    t        dddddd      }t        	 dd| it        |fi |}|S )	Nr   rU  r  Fr   rR  re  )vit_large_patch16_siglip_256r{  r|  s       rD   r\  r\    L    R25^cJ '&]3=]AEjA[TZA[]ELrE   c           	      X    t        dddddd      }t        	 dd| it        |fi |}|S )	Nr   rU  r  Fr   rR  re  )vit_large_patch16_siglip_384r{  r|  s       rD   r_  r_    r]  rE   c           	      Z    t        ddddddd      }t        	 d	d| it        |fi |}|S )
Nr   rU  r  Fr   rN  rO  re  )vit_large_patch16_siglip_512r{  r|  s       rD   ra  ra    sO    R25^cJ '&]3=]AEjA[TZA[]ELrE   c           	      Z    t        ddddddd      }t        	 d
d	| it        |fi |}|S )Nr       r   爅ZӼ@Fr   r   r   r   rY   rZ   r   r   re  )vit_so400m_patch14_siglip_224r{  r|  s       rD   rg  rg    O    R2]bpuJ ''^4>^BFzB\U[B\^ELrE   c           	      Z    t        ddddddd      }t        	 d
d	| it        |fi |}|S )Nr  rc  rd  r   re  Fr   rf  re  )vit_so400m_patch14_siglip_378r{  r|  s       rD   rj  rj  %  sQ     R2]bpuJ ''^4>^BFzB\U[B\^ELrE   c           	      Z    t        ddddddd      }t        	 d
d	| it        |fi |}|S )Nr  rc  rd  r   re  Fr   rf  re  )vit_so400m_patch14_siglip_384r{  r|  s       rD   rl  rl  0  rh  rE   c           
      \    t        dddddddd      }t        	 d
d	| it        |fi |}|S )Nr   rc  rd  re  Fr   rN  r   r   r   rY   rZ   r   r   rc   re  )vit_so400m_patch16_siglip_256r{  r|  s       rD   ro  ro  :  R    R2]bpuJ ''^4>^BFzB\U[B\^ELrE   c           
      \    t        dddddddd      }t        	 d
d	| it        |fi |}|S )Nr   rc  rd  re  Fr   rN  rn  re  )vit_so400m_patch16_siglip_384r{  r|  s       rD   rr  rr  E  rp  rE   c           
      \    t        dddddddd      }t        	 d
d	| it        |fi |}|S )Nr   rc  rd  re  Fr   rN  rn  re  )vit_so400m_patch16_siglip_512r{  r|  s       rD   rt  rt  P  rp  rE   c           	      Z    t        ddddddd      }t        	 d	d| it        |fi |}|S )
Nr   r?  r  Fr   rN  rO  re  )vit_giantopt_patch16_siglip_256r{  r|  s       rD   rv  rv  [  O    R25^cJ ')`6@`DHD^W]D^`ELrE   c           	      Z    t        ddddddd      }t        	 d	d| it        |fi |}|S )
Nr   r?  r  Fr   rN  rO  re  )vit_giantopt_patch16_siglip_384r{  r|  s       rD   ry  ry  f  rw  rE   c           
      \    t        dddddddd      }t        	 d	d| it        |fi |}|S )
Nr  r   r   Fr   rN  r   r   r   rY   r   r   r   rc   re  )vit_base_patch32_siglip_gap_256r{  r|  s       rD   r|  r|  q  sR    B"%]blqJ ')`6@`DHD^W]D^`ELrE   c           	      Z    t        ddddddd      }t        	 dd| it        |fi |}|S )	^ A SigLIP variant of ViT with global average pooling (GAP) instead of attention pooling (MAP).r   r   r   Fr   r  re  )vit_base_patch16_siglip_gap_224r{  r|  s       rD   r  r  |  Q     B"%]blqJ ')`6@`DHD^W]D^`ELrE   c           	      Z    t        ddddddd      }t        	 dd| it        |fi |}|S )	r~  r   r   r   Fr   r  re  )vit_base_patch16_siglip_gap_256r{  r|  s       rD   r  r    r  rE   c           	      Z    t        ddddddd      }t        	 dd| it        |fi |}|S )	r~  r   r   r   Fr   r  re  )vit_base_patch16_siglip_gap_384r{  r|  s       rD   r  r    r  rE   c           	      Z    t        ddddddd      }t        	 dd| it        |fi |}|S )	r~  r   r   r   Fr   r  re  )vit_base_patch16_siglip_gap_512r{  r|  s       rD   r  r    r  rE   c           	      Z    t        ddddddd      }t        	 dd| it        |fi |}|S )	r~  r   rU  r  Fr   r  re  ) vit_large_patch16_siglip_gap_256r{  r|  s       rD   r  r    Q     R25^cmrJ '*a7AaEI*E_X^E_aELrE   c           	      Z    t        ddddddd      }t        	 dd| it        |fi |}|S )	r~  r   rU  r  Fr   r  re  ) vit_large_patch16_siglip_gap_384r{  r|  s       rD   r  r    r  rE   c           
      \    t        dddddddd      }t        	 d	d| it        |fi |}|S )
Nr   rU  r  Fr   rN  r{  re  ) vit_large_patch16_siglip_gap_512r{  r|  s       rD   r  r    sP    R255KJ '*a7AaEI*E_X^E_aELrE   c           
      \    t        dddddddd      }t        	 d
d	| it        |fi |}|S )r~  r  rc  rd  r   re  Fr   r  re  )!vit_so400m_patch14_siglip_gap_224r{  r|  s       rD   r  r    R     R2ueJ '+b8BbFJ:F`Y_F`bELrE   c           
      \    t        dddddddd      }t        	 d
d	| it        |fi |}|S )r~  r  rc  rd  r   re  Fr   r  re  )!vit_so400m_patch14_siglip_gap_378r{  r|  s       rD   r  r    r  rE   c           
      \    t        dddddddd      }t        	 d
d	| it        |fi |}|S )r~  r  rc  rd  r   re  Fr   r  re  )!vit_so400m_patch14_siglip_gap_384r{  r|  s       rD   r  r    r  rE   c           
      \    t        dddddddd      }t        	 d
d	| it        |fi |}|S )r~  r  rc  rd  r   re  Fr   r  re  )!vit_so400m_patch14_siglip_gap_448r{  r|  s       rD   r  r    r  rE   c           
      \    t        dddddddd      }t        	 d
d	| it        |fi |}|S )r~  r  rc  rd  r   re  Fr   r  re  )!vit_so400m_patch14_siglip_gap_896r{  r|  s       rD   r  r    r  rE   c                 ^    t        ddddddddd	      }t        	 d
d	| it        |fi |}|S )r~  r   rc  rd  re  Fr   rN  	r   r   r   rY   rZ   r   r   r   rc   re  )!vit_so400m_patch16_siglip_gap_256r{  r|  s       rD   r  r    sT     R2ue{J '+b8BbFJ:F`Y_F`bELrE   c                 ^    t        ddddddddd	      }t        	 d
d	| it        |fi |}|S )Nr   rc  rd  re  Fr   rN  r  re  )!vit_so400m_patch16_siglip_gap_384r{  r|  s       rD   r  r    S    R2]b5KJ '+b8BbFJ:F`Y_F`bELrE   c                 ^    t        ddddddddd	      }t        	 d
d	| it        |fi |}|S )Nr   rc  rd  re  Fr   rN  r  re  )!vit_so400m_patch16_siglip_gap_512r{  r|  s       rD   r  r    r  rE   c           
      \    t        dddddddd      }t        	 d	d| it        |fi |}|S )
Nr   r?  r  Fr   rN  r{  re  )#vit_giantopt_patch16_siglip_gap_256r{  r|  s       rD   r  r  '  P    R255KJ '-d:DdHLZHb[aHbdELrE   c           
      \    t        dddddddd      }t        	 d	d| it        |fi |}|S )
Nr   r?  r  Fr   rN  r{  re  )#vit_giantopt_patch16_siglip_gap_384r{  r|  s       rD   r  r  2  r  rE   c                 `    t        ddddddddd	d

      }t        	 dd| it        |fi |}|S )Nr   rN  r  r  rJ      FTr)   r   
r   r   r   rY   r7   rZ   r   r   r   r   re  )vit_wee_patch16_reg1_gap_256r{  r|  s       rD   r  r  =  sU    B!YZ$1%J '&]3=]AEjA[TZA[]ELrE   c                 j    t        dddddddddd	t        
      }t        	 dd| it        |fi |}|S )Nr   rN  r  rJ   r  FTr)   r   )r   r   r   rY   r7   rZ   r   r   r   r   r  re  )vit_pwee_patch16_reg1_gap_256)r  r   rv  r|  s       rD   r  r  H  sX    B!YZ$1%ZnJ ''^4>^BFzB\U[B\^ELrE   c                 `    t        ddddddddd	d

      }t        	 dd| it        |fi |}|S )Nr   @  r  r  rJ   ffffff@FTr)   r   r  re  )vit_little_patch16_reg1_gap_256r{  r|  s       rD   r  r  S  U    B!Y\$1%J ')`6@`DHD^W]D^`ELrE   c                 `    t        ddddddddd	d

      }t        	 dd| it        |fi |}|S )Nr   r  r  r  rJ   r  FTr  r   r  re  )vit_little_patch16_reg4_gap_256r{  r|  s       rD   r  r  ^  r  rE   c                 ^    t        ddddddddd	
	      }t        	 dd| it        |fi |}|S )Nr   rT  r   r  rJ   FTr)   r   	r   r   r   rY   r7   r   r   r   r   re  )vit_medium_patch16_reg1_gap_256r{  r|  s       rD   r  r  i  R    B!$1%J ')`6@`DHD^W]D^`ELrE   c                 ^    t        ddddddddd	
	      }t        	 dd| it        |fi |}|S )Nr   rT  r   r  rJ   FTr  r   r  re  )vit_medium_patch16_reg4_gap_256r{  r|  s       rD   r  r  t  r  rE   c                 ^    t        ddddddddd	
	      }t        	 dd| it        |fi |}|S )Nr   rT     r  rJ   FTr  r   r  re  ) vit_mediumd_patch16_reg4_gap_256r{  r|  s       rD   r  r    R    B!$1%J '*a7AaEI*E_X^E_aELrE   c                 ^    t        ddddddddd	
	      }t        	 dd| it        |fi |}|S )Nr   rT  r  r  rJ   FTr  r   r  re  ) vit_mediumd_patch16_reg4_gap_384r{  r|  s       rD   r  r    r  rE   c                 ^    t        ddddddddd	
	      }t        	 dd| it        |fi |}|S )Nr   rX  r   r  rJ   FTr)   r   r  re  ) vit_betwixt_patch16_reg1_gap_256r{  r|  s       rD   r  r    R    B"$$1%J '*a7AaEI*E_X^E_aELrE   c                 ^    t        ddddddddd	
	      }t        	 dd| it        |fi |}|S )Nr   rX  r   r  rJ   FTr  r   r  re  ) vit_betwixt_patch16_reg4_gap_256r{  r|  s       rD   r  r    r  rE   c                 ^    t        ddddddddd	
	      }t        	 dd| it        |fi |}|S )Nr   rX  r   r  rJ   FTr  r   r  re  ) vit_betwixt_patch16_reg4_gap_384r{  r|  s       rD   r  r    r  rE   c           
      \    t        dddddddd      }t        	 d
d	| it        |fi |}|S )Nr   r   r   FTr   r  )r   r   r   rY   r   r   r   r   re  )vit_base_patch16_reg4_gap_256r{  r|  s       rD   r  r    sP    B"%1J ''^4>^BFzB\U[B\^ELrE   c           
      \    t        dddddddd	      }t        	 dd
| it        |fi |}|S )F SO150M (shape optimized, but diff than paper def, optimized for GPU) r   r\  r  r  ~jt@Fr  r   )r   r   r   rY   rZ   r   r   r   re  )vit_so150m_patch16_reg4_map_256r{  r|  s       rD   r  r    sR     B"aUJ ')`6@`DHD^W]D^`ELrE   c                 ^    t        ddddddddd		      }t        	 dd
| it        |fi |}|S )r  r   r\  r  r  r  Fr  r   	r   r   r   rY   rZ   r   r   r   r   re  )vit_so150m_patch16_reg4_gap_256r{  r|  s       rD   r  r    T     B"aUEJ ')`6@`DHD^W]D^`ELrE   c                 ^    t        ddddddddd		      }t        	 dd
| it        |fi |}|S )r  r   r\  r  r  r  Fr  r   r  re  )vit_so150m_patch16_reg4_gap_384r{  r|  s       rD   r  r    r  rE   c                 `    t        dddddddddd	

      }t        	 dd| it        |fi |}|S )I SO150M v2 (shape optimized, but diff than paper def, optimized for GPU) r   @        NN@rJ   Fr)   r   
r   r   r   rY   rZ   r7   r[   r   r   r   re  ) vit_so150m2_patch16_reg1_gap_256r{  r|  s       rD   r  r    W     B"[_EaUJ '*a7AaEI*E_X^E_aELrE   c                 `    t        dddddddddd	

      }t        	 dd| it        |fi |}|S )r  r   r  r  r  r  rJ   Fr)   r   r  re  ) vit_so150m2_patch16_reg1_gap_384r{  r|  s       rD   r  r    r  rE   c                 `    t        dddddddddd	

      }t        	 dd| it        |fi |}|S )r  r   r  r  r  r  rJ   Fr)   r   r  re  ) vit_so150m2_patch16_reg1_gap_448r{  r|  s       rD   r  r    r  rE   c           	      Z    t        ddddddd      }t        	 d
d	| it        |fi |}|S )Nr  rU  r  r   g?FT)r   r   r   rY   r7   r   r   re  )vit_intern300m_patch14_448r{  r|  s       rD   r  r  	  sN    R2EDJ '$[1;[?CJ?YRX?Y[ELrE   c                     t        dddddddddddt        t        d	
      t        t        d	
      t              }t	        	 dd| it        |fi |}|S ) ViT Large AIM-v2 model
    r  rU  r  r  F      @r   silurJ   r  r   r   r   rY   r   r   rZ   r   r[   r_   rc   rd   r  re   re  )aimv2_large_patch14_224r  r   r   r   rv  r|  s       rD   r  r    p     R1%Y^EEUV\7-UY@ZflJ
 '!X.8X<@<Vv<VXELrE   c                     t        dddddddddddt        t        d	
      t        t        d	
      t              }t	        	 dd| it        |fi |}|S ) ViT Huge AIM-v2 model
    r  r?  r  r   FAfU@r   r  rJ   r  r  re  )aimv2_huge_patch14_224r  r|  s       rD   r  r  "  sp    
 R25Z_eeuX^7-UY@ZflJ
 ' W-7W;?
;Uf;UWELrE   c                     t        dddddddddddt        t        d	
      t        t        d	
      t              }t	        	 dd| it        |fi |}|S ) ViT 1B AIM-v2 model
    r     r  r   Fr  r   r  rJ   r  r  re  )aimv2_1b_patch14_224r  r|  s       rD   r  r  1  p     R25Z_EEUV\7-UY@ZflJ
 'U+5U9=j9SF9SUELrE   c                     t        dddddddddddt        t        d	      t        t        d	      t        
      }t	        	 dd| it        |fi |}|S ) ViT 3B AIM-v2 model
    r     r  Fr  r   r  rJ   r  r  re  )aimv2_3b_patch14_224r  r|  s       rD   r  r  ?  p     R25Z_eeuX^7-UY@ZflJ
 'U+5U9=j9SF9SUELrE   c                     t        dddddddddddt        t        d	
      t        t        d	
      t              }t	        	 dd| it        |fi |}|S )r  r  rU  r  r  Fr  r   r  rJ   r  r  re  )aimv2_large_patch14_336r  r|  s       rD   r  r  M  r  rE   c                     t        dddddddddddt        t        d	
      t        t        d	
      t              }t	        	 dd| it        |fi |}|S )r  r  r?  r  r   Fr  r   r  rJ   r  r  re  )aimv2_huge_patch14_336r  r|  s       rD   r  r  [  p     R25Z_eeuX^7-UY@ZflJ
 ' W-7W;?
;Uf;UWELrE   c                     t        dddddddddddt        t        d	
      t        t        d	
      t              }t	        	 dd| it        |fi |}|S )r  r  r  r  r   Fr  r   r  rJ   r  r  re  )aimv2_1b_patch14_336r  r|  s       rD   r  r  i  r  rE   c                     t        dddddddddddt        t        d	      t        t        d	      t        
      }t	        	 dd| it        |fi |}|S )r  r  r  r  Fr  r   r  rJ   r  r  re  )aimv2_3b_patch14_336r  r|  s       rD   r  r  w  r  rE   c                     t        dddddddddddt        t        d	
      t        t        d	
      t              }t	        	 dd| it        |fi |}|S )r  r  rU  r  r  Fr  r   r  rJ   r  r  re  )aimv2_large_patch14_448r  r|  s       rD   r  r    r  rE   c                     t        dddddddddddt        t        d	
      t        t        d	
      t              }t	        	 dd| it        |fi |}|S )r  r  r?  r  r   Fr  r   r  rJ   r  r  re  )aimv2_huge_patch14_448r  r|  s       rD   r  r    r  rE   c                     t        dddddddddddt        t        d	
      t        t        d	
      t              }t	        	 dd| it        |fi |}|S )r  r  r  r  r   Fr  r   r  rJ   r  r  re  )aimv2_1b_patch14_448r  r|  s       rD   r  r    r  rE   c                     t        dddddddddddt        t        d	      t        t        d	      t        
      }t	        	 dd| it        |fi |}|S )r  r  r  r  Fr  r   r  rJ   r  r  re  )aimv2_3b_patch14_448r  r|  s       rD   r  r    r  rE   c           	      V    t        dddddd      }t        d	d| it        |fi |}|S )
 ViT Test
    r   @   r  r   r   T)r   r   r   rY   rZ   r   re  )test_vitr{  r|  s       rD   r  r    s=     raSTgklJ&gjgDQ[Lf_eLfgELrE   c                 ^    t        ddddddddd	d

      }t        dd| it        |fi |}|S )r	  r   r
  r  r   r   Fr)   r   rJ   T)
r   r   r   rY   rZ   r   r   r   r7   r   re  )	test_vit2r{  r|  s       rD   r  r    sK     1QaU_ceJ 'hzhTR\Mg`fMghELrE   c                 ^    t        ddddddddd	d

      }t        dd| it        |fi |}|S )r	  r   `   	   r   r   Fr)   r   TrJ   )
r   r   r   rY   rZ   r   r   r   r   r7   re  )	test_vit3r{  r|  s       rD   r  r    sL     1QaUPTbfhJ 'hzhTR\Mg`fMghELrE   c                 `    t        dddddddddd	d
      }t        dd| it        |fi |}|S )r	  r   r  r  r   Fr)   r   rJ   Trmsnorm)r   r   r   rY   rZ   r   r   r   r7   r   rd   re  )	test_vit4r{  r|  s       rD   r  r    sM     1QaU_cJ
 'hzhTR\Mg`fMghELrE   c                 |    t        dddddddddt        t        d      	
      }t        dd
| it        |fi |}|S )zu BEiT3 Base model (ViT-Base size) with patch size 16x16.
    Remapped to VisionTransformer with scale_norm=True.
    r   r   r   r  Tr   rJ   r  
r   r   r   rY   rZ   r]   r^   r   r   rd   re  )beit3_base_patch16_224r  r|  s       rD   r  r    sS    
 B"TtQV9$/J
 'uJuZ^_iZtmsZtuELrE   c                 |    t        dddddddddt        t        d      	
      }t        dd
| it        |fi |}|S )zw BEiT3 Large model (ViT-Large size) with patch size 16x16.
    Remapped to VisionTransformer with scale_norm=True.
    r   rU  r  r  Tr   rJ   r  r  re  )beit3_large_patch16_224r  r|  s       rD   r  r    sS    
 R2TtQV9$/J
 'vZv[_`j[unt[uvELrE   c                 |    t        dddddddddt        t        d	      

      }t        dd| it        |fi |}|S )zf BEiT3 Giant model with patch size 14x14.
    Remapped to VisionTransformer with scale_norm=True.
    r  r  r  r   8mt@Tr   rJ   r  r  re  )beit3_giant_patch14_224r  r|  s       rD   r  r    sS    
 R2TtQV9$/J
 'vZv[_`j[unt[uvELrE   c                 ~    t        ddddddddddt        t        d	
            }t        dd| it        |fi |}|S )z} BEiT3 Giant model with patch size 14x14 and image size 336x336.
    Remapped to VisionTransformer with scale_norm=True.
    rS  r  r  r  r   r  Tr   rJ   r  )r   r   r   r   rY   rZ   r]   r^   r   r   rd   re  )beit3_giant_patch14_336r  r|  s       rD   r  r    sV    
 t2W]TtQV9$/J
 'vZv[_`j[unt[uvELrE   vit_tiny_patch16_224_in21kvit_small_patch32_224_in21kvit_small_patch16_224_in21kvit_base_patch32_224_in21kvit_base_patch16_224_in21kvit_base_patch8_224_in21kvit_large_patch32_224_in21kvit_large_patch16_224_in21kvit_huge_patch14_224_in21kvit_base_patch32_224_samzvit_base_patch32_224.samvit_base_patch16_224_samzvit_base_patch16_224.samvit_small_patch16_224_dinovit_small_patch8_224_dinovit_base_patch16_224_dinovit_base_patch8_224_dinovit_base_patch16_224_miil_in21k!vit_base_patch32_224_clip_laion2b)"vit_large_patch14_224_clip_laion2b!vit_huge_patch14_224_clip_laion2b"vit_giant_patch14_224_clip_laion2b)r   r)   Fr  )r   rX   )r  rX   )r)   r  r>  F)r   F)zvisual.)Fr>  T)FNr  (  rN   copyloggingr5  oscollectionsr   	functoolsr   typingr   r   r   r   r	   r
   r   r   r   r   ImportErrortyping_extensionsr?   torch.nnr=   torch.nn.functional
functionalr   	torch.jitr   	timm.datar   r   r   r   r   r   timm.layersr   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   _builderr*   	_featuresr+   _manipulater,   r-   r.   r/   	_registryr0   r1   r2   __all__	getLoggerrK   _loggerr   r5   rV   r   r   r   rR   r  rO   rQ   r   r3   rL  rP   r  r  rH  r  no_gradrQ  r  r  r  r  r/  r;  rF  default_cfgsr  r  _quick_gelu_cfgsr  deepcopycr  environlowerrr  rv  rz  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rQ  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r  r  r  r
  r  r  r  r  r  r  r  r!  r$  r&  r)  r+  r-  r1  r4  r6  r8  r:  r<  rA  rF  rH  rJ  rL  rP  rS  rV  rX  rZ  r\  r_  ra  rg  rj  rl  ro  rr  rt  rv  ry  r|  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )r  rL  s   00rD   <module>rO     sA>  2    	 #  O O O*      
     * + + R R Y Y
 '

H
%F F:EBII EP:299 :zc299 cLH")) HZ !!"&+	<<   $	2g		 gT")) 3   # u W[ 0")) 3  (%s %u %x % "#"$&LL  c3h	
   \\2 WT* WTS WT# WTcg WTtx WT WTz  #ell*+# # # 
#u||
	#Lell*+  
#u||
.ell*+  
#u||
$9t 9~ #(&Jell*+J J  J 	J
 J 
#u||
JZc T#s(^  G 1$3G
 1$&G 02G 0 d2G 0 d]S2BG$ 1$ e3%G, 1$ e]S3B-G4 1$ e35G< 1$ e]S3B=GD 0 f2EGL 0 d]S2BMGT 0 e2UG\ 0 e]S2B]Gd / d1eGl 1$ e3mGt 1$ e]S3BuG@ .t}0AGF .t} 300GGN /~ 310OGZ ( d*[Gb ( d]S*BcGj ' d)kGr ' d]S)BsGz ' d){GB ' d]S)BCGL &t|MGN %drlOGP &t|QGR )$2,SGX &t(YG` &t(aGh ')iGp ')qGx &t(yGB ({e*-CGJ )$|e+-KGR )$|e+-SGZ (}e*-[Gb (|e*-cGj '{e)-kGr )$|e+-sG~ $THVZ&GD $THVZ&EGN !$i"(<!#MOGV  g"(<!"MWG^  e"(<!"M_Gf c"(<!!MgGr '\"(<! 3)0sG~ &t\"(<! 3(0GJ '\"(<! 3)0KGV '\"(<! 3)0WGf ,Ta"(<! 3.0gGr +Da"(<! 3-0sG~ ,Ta"(<! 3.0GJ ,Ta"(<! 3.0KGZ &t L|e:ch(j[Gb .t N|e:0WcGn ' L)oGt *4 4U,DuGz 24 441{G@ 24 484EAGF GGL 6t?84MGR 6t?S]8\SGX 6t?S]8\YG^ 6t?T8C_Gd 6t?(8DeGl 7$*@39PmGr 7$*@(9DsGz 6t?S8B{G@	 6t?(8DA	GJ	 5d?74K	GP	 5d?-87EQ	GX	 5d?T7CY	G^	 5d?-87E_	Gf	 6t?S8Bg	Gl	 6t?(8Dm	Gv	 0?24w	G|	 0?S2B}	GB
 0?(2DC
GJ
 1$$*@33PK
GP
 1$$*@(3DQ
GX
 0?S2BY
G^
 0?(2D_
Gh
 /?14i
Gn
 /?14o
Gt
 /?(1Du
G|
 0?S2B}
GD 1$?3GEGJ 24$*@3\a4cKGP 1$?Se3UQGX 0?2GYG^ 1$?Se3U_Gf (?*EgGl (?Sc*SmGr )$$*@3\_+asGx (?Sd*TyG~ )$?Sd+TGD ,T?Sd.TEGL .tW?0EMGT .t?Sc0SUGZ 3D? 3C5A[Gb /?Sc1ScGj +D?Sc-SkGp +D?C-AqGx +D?Sc-SyG~ ,T?Sc.SGF &t?Sc(SGGN ,T?Sc.SOGV 'W?Sc	)SWG` &tW?Sd	(TaGj &t?WD(BkGx /W?Sc	1SyGB /W?Sc	1SCGL 0W?Sc	2SMGV /W?Sd	1TWG` 4T?Sd6TaGh 3DW?Sd	5TiGr .tW?Sc	0SsG| .tW?Sc	0S}GF /W?Sc	1SGGR 'W?)ESGZ 'W?)E[Gb (W?Sc*ScGj (W?C	*AkGx *4B=[_+`yGz *4B=[_+`{G| +DRL}G~ +DRLG@ *4B<AGH 05? 3	20IGR 05? 3(	2DSG\ *45? 3	,0]Gf *45? 3(	,DgGr !$Sae 4#1sGz  Ygk 4"1{GB  Ygk 4"1CGL  Sae 4"1MGT Ygk 4!1UG\ Ygk 4!1]Gd !$[im 4U#DeGl  Zhl 4U"DmGv !$Sae 4#1wG~  Ygk 4"1GF  Ygk 4"1GGP "4Wei 4U$DQGX "4Wei 4U$DYGb ("cGd )$2,eGf ("gGj S"(<!	!MkGt  T"(<!	"MuG~ S"(<!	!MGJ *4M"(<!	,MKGT +DN"(<!	-MUG^ *4S 3"(<!,M_Gj ,TN"(<!	.MkGv +D -wG~ +D-GD (*EGJ +D -KGR ( *SGZ -d /[Gb +D -cGj ( *kGr +D -sGz ( *{GB ,T .CGJ )$ +KGR ,T .SGZ )$ +[Gb ,T .cGj -d/kGp *4,qGv -d /wG~ *4 ,GF *4 ,GGN -d /OGV / 1WG^ -d /_Gf -d /gGn / 1oGv / 1wG@ / 1AGH /1IGN ,T.OGT / 1UG\ ,T .]Gd 1$ 3eGl / 1mGt ,T .uG| / 1}GD ,T .EGL 0 2MGT -d /UG\ 0 2]Gd -d /eGl 0 2mGt 1$3uGz .t0{G@ 1$3AGF 02GGL 4T6MGR 5d7SGb 1$ 3cGj .t 30kGr .t 30sGz 1$ 33{GB 0 32CGJ 9$ 3;KGR 4T 36SGZ 4T 36[Gb 5d 37cGv 7 39wG~ 8 3:GF 0 32GGN 9$ 3;OGV 4T 36WG^ 4T 36_Gf 5d 37gGz 1$ 3{GB 3D 5CGJ 1$ 3KGR 1$ 3SGZ 3D 5[Gb 3D 5cGl 24 3(4mGt 6t 3(8uG~ 3D?5EGF 5d?7EGGN 3D?5EOGV 6t?8EWG` ,T 4.1aGf -d 4/1gGl 8 4:1mGr 0 421sGz / 411{G@ / 411AGF 8 4:1GGL / 411MGR 0 421SGZ ? 4A1[G` 9$ 4;1aGf 7 491gGn 1$ 431oGv ? 3A0wG| 0 421}GB ? 4A1CGH 9$ 4;1IGN 0 421OGT 7 491UG\ 1$ 431]Gd ? 3A0eGj .t 0"kGp =d 4?1qGv 5d 471wG~ =d 3?0GD  0 2"E GH  >t 3@0I GN  6t 380O GV  >t 3@0W G\  >t 3(@D] Gd  +D"(< 3A-e Gn  +D"(< 3A-o Gz  '?L!)%{ GB! ,T?L!.%C!GJ! &t?L!(%K!GR! $T?L!&%S!GZ! $T?L!&%[!Gb! '?L 3A)?c!Gj! ,T?L 3A.?k!Gr! &t?L 3A(?s!Gz! $T?L 3A&?{!GB" $T?L 3A&?C"GJ" '?L 3A)?K"GR" &t?L 3A(?S"GZ" $T?L 3A&?["Gb" $T?L 3A&?c"Gl" $ 41m"Gr" 4 41s"Gx" 4 41y"G~" 4 41"GF# +D"(<s-LG#GL# 4T"(<s6LM#GR# ,T"(<s.LS#GX# 5d"(<s7LY#G^# (*0Ds*T_#Gb# (=/DJ^il*nc#Gf#  "(<s"g#Gp# +/"(<s+
 #'"(<s#
 ,0"(<s,E$GR$ #/"4"4"6n$!Q!%%:LQ\`abi`jkl`mQmAn 	 >Al1o&A~  1+<=L8%789> %\2 jjnn%97CIIKvU  !%)### TN#
 k)*#L T @Q   T @Q   d AR   d AR   d AR   d AR   T @Q   T @Q   T @Q   T @Q   T @Q   D ?P   d AR   d AR   d AR   d AR   d AR   T @Q   d AR    DU   $ EV   4 FW   4 FW   4 FW   D GX    DU    DU    DU   $ EV   D GX   D GX   D GX   T HY   $ EV   $ EV   $ EV   $ EV   $ EV   $ EV   t J[   4 FW   4 FW   $ EV   $ EV   $ EV   
4 
FW 
 
 
d 
IZ 
 
 	D 	O` 	 	 	D 	O` 	 	 	T 	Pa 	 	 	T 	Pa 	 	 	D 	O` 	 	 	D 	O` 	 	 	 	Sd 	 	 $ EV   $ EV    DU   4 FW   	4 	FW 	 	 $ EV   d AR   d AR   t :K   d 9J   t :K   	 	CT 	 	 	 	DU 	 	 	 	CT 	 	  DU    CT    DU    DU    	d 	IZ 	 	 	T 	HY 	 	 	d 	IZ 	 	 d IZ    D GX   D GX   D GX   D GX   D GX   T HY   T HY   T HY   d IZ   d IZ   d IZ   d IZ   d IZ   d IZ    K\    K\    K\    K\    K\    K\    K\    L]    L]    L]   $ M^   $ M^   $ M^   $ M^   $ M^   $ M^   $ M^   $ M^   D O`   D O`   T HY   d IZ    K\    K\    K\    K\    L]    L]    L]    L]    L]   d IZ    K\    K\    K\    L]    L]    L]   4 FW   
 
CT 
 
 t BS   
T 
@Q 
 
 
T 
@Q 
 
 
 
CT 
 
 
t 
BS 
 
 
T 
@Q 
 
 
T 
@Q 
 
 
 
CT 
 
 
t 
BS 
 
 
T 
@Q 
 
 
T 
@Q 
 
  4E   $ 5F   $ 5F   	$ 	5F 	 	 
t 
BS 
 
 
 
CT 
 
 
 
CT 
 
 
 
CT 
 
 H ' "E'!#G' "#G' !"E	'
 !"E'  !C' "#E' "#G' !"C'  :'  :' !">'  !<'  !<'  :'  &'H!'" ()L#'$ +O)L*N)' }G  *))*XQ os.   Az) NAz;N8Az;OAz;z)Az8z7Az8