
    kh                     h   d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	d dl
mZ d dlmZmZ d dlmZmZmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ dgZ d Z! G d dejD                        Z# G d dejD                        Z$ejJ                  dfde&de&deejD                     de'dejP                  f
dZ) G d dejD                        Z* G d dejD                        Z+ G d dejD                        Z, G d dejD                        Z- G d  d!ejD                        Z. G d" d#ejD                        Z/ G d$ d%ejD                        Z0 G d& d'ejD                        Z1 G d( d)ejD                        Z2 G d* dejD                        Z3dWd+Z4 ei d, e4d-.      d/ e4d-.      d0 e4d-.      d1 e4d-.      d2 e4d-.      d3 e4d-.      d4 e4d-d56      d7 e4d-.      d8 e4d-.      d9 e4d-.      d: e4d-.      d; e4d-.      d< e4d-.      d= e4d-d56      d> e4d?d@d5dAdBdCD      dE e4dFdGd5dAdBdCD      dH e4dIdJd5dAdBdCD            Z5dK Z6dXdLZ7edXdM       Z8edXdN       Z9edXdO       Z:edXdP       Z;edXdQ       Z<edXdR       Z=edXdS       Z>edXdT       Z?edXdU       Z@edXdV       ZAy)Y    N)partial)ListOptionalTupleTypeUnionIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)DropPathtrunc_normal_create_conv2dConvNormActSqueezeExciteuse_fused_attnClassifierHead   )build_model_with_cfg)feature_take_indices)checkpoint_seq)register_modelgenerate_default_cfgsFastVitc                 &    | sy|| z  dk(  sJ || z  S )Nr   r    )
group_sizechannelss     O/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/fastvit.py
num_groupsr      s(     *$))):%%    c                       e Zd ZdZddddddddej
                  f	dededed	ed
edededededededeej                     ddf fdZ
dej                  dej                  fdZd Zdeej                  ej                  f   fdZdeej$                  ej&                  f   deej                  ej                  f   fdZ xZS )MobileOneBlocka#  MobileOne building block.

    This block has a multi-branched architecture at train-time
    and plain-CNN style architecture at inference time
    For more details, please refer to our paper:
    `An Improved One millisecond Mobile Backbone` -
    https://arxiv.org/pdf/2206.04040.pdf
    r   r   FTin_chsout_chskernel_sizestridedilationr   inference_modeuse_seuse_actuse_scale_branchnum_conv_branches	act_layerreturnNc                    t         t        |           || _        t	        ||      | _        || _        || _        || _        || _	        || _
        || _        |rt        |d      nt        j                         | _        |r"t!        |||||| j
                  d      | _        nd| _        ||k(  r|dk(  rt        j$                  |      nd| _        |dkD  rst        j(                  t+        | j                        D cg c];  }t-        | j                  | j                  || j                  | j
                  d	      = c}      | _        nd| _        d| _        |dkD  r@|
r>t-        | j                  | j                  d| j                  | j
                  d	      | _        |	r |       | _        yt        j                         | _        yc c}w )
a  Construct a MobileOneBlock module.

        Args:
            in_chs: Number of channels in the input.
            out_chs: Number of channels produced by the block.
            kernel_size: Size of the convolution kernel.
            stride: Stride size.
            dilation: Kernel dilation factor.
            group_size: Convolution group size.
            inference_mode: If True, instantiates model in inference mode.
            use_se: Whether to use SE-ReLU activations.
            use_act: Whether to use activation. Default: ``True``
            use_scale_branch: Whether to use scale branch. Default: ``True``
            num_conv_branches: Number of linear conv branches.
        r   )
rd_divisorTr%   r&   r'   groupsbiasN)num_featuresr   Fr%   r&   r2   	apply_act)superr"   __init__r(   r   r2   r&   r'   r%   r#   r$   r,   r   nnIdentityser   reparam_convBatchNorm2didentity
ModuleListranger   conv_kxk
conv_scaleact)selfr#   r$   r%   r&   r'   r   r(   r)   r*   r+   r,   r-   _	__class__s                 r   r8   zMobileOneBlock.__init__,   s   < 	nd,., V4 &!2 ;A-A6bkkm -'!{{!D !%D f$1 F3 M !1$ "  %T%;%;<	/   $/#{{#{{"'	/ 	! !% #DOQ#3"-KKLL !;;;;## #*9;r{{}3	/s   ;A Gxc                    | j                   /| j                  | j                  | j                  |                  S d}| j                  | j                  |      }d}| j                  | j	                  |      }||z   }| j
                  | j
                  D ]  }| ||      z  } | j                  | j                  |            S )zApply forward pass.r   )r<   rC   r;   r>   rB   rA   )rD   rG   identity_out	scale_outoutrcs         r   forwardzMobileOneBlock.forward   s     (88DGGD$5$5a$89:: ==$==+L 	??&*I ,&==$mm r!u xx%%r    c           	      <   | j                   y| j                         \  }}t        | j                  | j                  | j
                  | j                  | j                  | j                  d      | _         || j                   j                  _
        || j                   j                  _
        | j                         D ]  \  }}d|v r|j                           | j                  d       | j                  d       t        | d      r| j                  d       d| _        y)a  Following works like `RepVGG: Making VGG-style ConvNets Great Again` -
        https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched
        architecture used at training time to obtain a plain CNN-like structure
        for inference.
        NT)in_channelsout_channelsr%   r&   r'   r2   r3   r<   rA   rB   r>   )r<   _get_kernel_biasr   r#   r$   r%   r&   r'   r2   weightdatar3   named_parametersdetach___delattr__hasattrr(   )rD   kernelr3   nameparas        r   reparameterizezMobileOneBlock.reparameterize   s     (,,.)((;;]];;
 )/  %&*# //1 	JD$%LLN	
 	$&4$Z("r    c                    d}d}| j                   [| j                  | j                         \  }}| j                  dz  }t        j                  j
                  j                  |||||g      }d}d}| j                  | j                  | j                        \  }}d}d}| j                  Et        | j                        D ]-  }| j                  | j                  |         \  }	}
||	z  }||
z  }/ ||z   |z   }||z   |z   }||fS )zMethod to obtain re-parameterized kernel and bias.
        Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L83

        Returns:
            Tuple of (kernel, bias) after fusing branches.
        r      )rB   _fuse_bn_tensorr%   torchr9   
functionalpadr>   rA   r@   r,   )rD   kernel_scale
bias_scalera   kernel_identitybias_identitykernel_conv	bias_convix_kernel_biaskernel_final
bias_finals                r   rQ   zMobileOneBlock._get_kernel_bias   s"    
??&'+';';DOO'L$L*""a'C 88..22<#sCQTAUVL ==$-1-A-A$---P*O] 	==$D223 #!%!5!5dmmB6G!Hw&U"	#
 #\1OC+m;
Z''r    branchc                    t        |t              r|j                  j                  }|j                  j
                  }|j                  j                  }|j                  j                  }|j                  j                  }|j                  j                  }n2t        |t        j                        sJ t        | d      s| j                  | j                  z  }t        j                  | j                  || j                   | j                   f|j                  j"                  |j                  j$                        }	t'        | j                        D ](  }
d|	|
|
|z  | j                   dz  | j                   dz  f<   * |	| _        | j(                  }|j
                  }|j                  }|j                  }|j                  }|j                  }||z   j+                         }||z  j-                  dddd      }||z  |||z  |z  z
  fS )a  Method to fuse batchnorm layer with preceding conv layer.
        Reference: https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95

        Args:
            branch: Sequence of ops to be fused.

        Returns:
            Tuple of (kernel, bias) after fusing batchnorm.
        	id_tensordtypedevicer   r]   )
isinstancer   convrR   bnrunning_meanrunning_varr3   epsr9   r=   rW   r#   r2   r_   zerosr%   rq   rr   r@   ro   sqrtreshape)rD   rm   rX   rw   rx   gammabetary   	input_dimkernel_valueistdts                r   r^   zMobileOneBlock._fuse_bn_tensor   s    fk*[[''F!9911L ))//KII$$E99>>D))--Cfbnn5554- KK4;;6	${{[[)T-=-=t?O?OP ----!==// 
 t{{+ A  !1y=$*:*:a*?AQAQUVAVV ".^^F!..L ,,KMME;;D**CS &&(S[!!"aA.z4,"6"<<<<r    )__name__
__module____qualname____doc__r9   GELUintboolr   Moduler8   r_   TensorrM   r[   r   rQ   r   
Sequentialr=   r^   __classcell__rF   s   @r   r"   r"   "   s>    #(  %)%&)+Z=Z= Z= 	Z=
 Z= Z= Z= !Z= Z= Z= #Z=  #Z= BIIZ= 
Z=x& &%,, &2!#F!(%ell(B"C !(F)=BMM2>>9:)=	u||U\\)	*)=r    r"   c                   l    e Zd ZdZ	 	 	 	 ddedededededee   d	ed
eej                     deddf fdZ	de
j                  de
j                  fdZdee
j                  e
j                  f   fdZddZedej"                  dej$                  dee
j                  e
j                  f   fd       Z xZS )ReparamLargeKernelConvzBuilding Block of RepLKNet

    This class defines overparameterized large kernel conv block
    introduced in `RepLKNet <https://arxiv.org/abs/2203.06717>`_

    Reference: https://github.com/DingXiaoH/RepLKNet-pytorch
    Nr#   r$   r%   r&   r   small_kernelr)   r-   r(   r.   c
           	      L   t         t        |           || _        t	        ||      | _        || _        || _        || _        || _	        |	r"t        ||||d| j
                  d      | _        nid| _        t        |||| j                  | j
                  d      | _        |6||k  sJ d       t        |||| j                  | j
                  d      | _        |rt        |d	      nt!        j"                         | _        | |       | _        yt!        j"                         | _        y)
a!  Construct a ReparamLargeKernelConv module.

        Args:
            in_chs: Number of input channels.
            out_chs: Number of output channels.
            kernel_size: Kernel size of the large kernel conv branch.
            stride: Stride size. Default: 1
            group_size: Group size. Default: 1
            small_kernel: Kernel size of small kernel conv branch.
            act_layer: Activation module. Default: ``nn.GELU``
            inference_mode: If True, instantiates model in inference mode. Default: ``False``
        r   Tr1   NFr5   zDThe kernel size for re-param cannot be larger than the large kernel!g      ?)rd_ratio)r7   r   r8   r&   r   r2   r#   r$   r%   r   r   r<   r   
large_conv
small_convr   r9   r:   r;   rC   )rD   r#   r$   r%   r&   r   r   r)   r-   r(   rF   s             r   r8   zReparamLargeKernelConv.__init__  s   0 	$d46 V4&( -'{{!D !%D)'{{{{DO ' K/ZYZ/"- ,;;;;## <B-$7r{{}"+"79;R[[]r    rG   c                     | j                   | j                  |      }n1| j                  |      }| j                  || j                  |      z   }| j                  |      }| j	                  |      }|S N)r<   r   r   r;   rC   )rD   rG   rK   s      r   rM   zReparamLargeKernelConv.forward`  sh    (##A&C//!$C*DOOA..ggclhhsm
r    c                    | j                  | j                  j                  | j                  j                        \  }}t	        | d      r| j                  | j
                  j                  | j
                  j                        \  }}||z  }|t        j                  j                  || j                  | j                  z
  dz  gdz        z  }||fS )zMethod to obtain re-parameterized kernel and bias.
        Reference: https://github.com/DingXiaoH/RepLKNet-pytorch

        Returns:
            Tuple of (kernel, bias) after fusing branches.
        r   r]      )_fuse_bnr   ru   rv   rW   r   r9   r`   ra   r%   r   )rD   eq_keq_bsmall_ksmall_bs        r   get_kernel_biasz&ReparamLargeKernelConv.get_kernel_biask  s     ]]4??#7#79K9KL
d4&#}}T__-A-A4??CUCUVGWGODBMM%%4++d.?.??AEFJ D Tzr    c                    | j                         \  }}t        | j                  | j                  | j                  | j
                  | j                  d      | _        || j                  j                  _	        || j                  j                  _	        | j                  d       t        | d      r| j                  d       yy)a  
        Following works like `RepVGG: Making VGG-style ConvNets Great Again` -
        https://arxiv.org/pdf/2101.03697.pdf. We re-parameterize multi-branched
        architecture used at training time to obtain a plain CNN-like structure
        for inference.
        Tr%   r&   r2   r3   r   r   N)r   r   r#   r$   r%   r&   r2   r<   rR   rS   r3   rV   rW   )rD   r   r   s      r   r[   z%ReparamLargeKernelConv.reparameterize{  s     ))+
d)KKLL((;;;;
 )-  %&*#&4&\* 'r    ru   rv   c                    | j                   }|j                  }|j                  }|j                   }|j                  }|j                  }||z   j                         }||z  j                  dddd      }	||	z  |||z  |z  z
  fS )zMethod to fuse batchnorm layer with conv layer.

        Args:
            conv: Convolutional kernel weights.
            bn: Batchnorm 2d layer.

        Returns:
            Tuple of (kernel, bias) after fusing batchnorm.
        rs   r   )rR   rw   rx   r3   ry   r{   r|   )
ru   rv   rX   rw   rx   r}   r~   ry   r   r   s
             r   r   zReparamLargeKernelConv._fuse_bn  s     nn		wwffS &&(S[!!"aA.z4,"6"<<<<r    )NFNFr.   N)r   r   r   r   r   r   r   r9   r   r8   r_   r   rM   r   r   r[   staticmethodConv2dr=   r   r   r   s   @r   r   r     s%    +/ -1#(BKBK BK 	BK
 BK BK #3-BK BK  		*BK !BK 
BKH	 	%,, 	u||U\\'A!B  +. =ii=^^=	u||U\\)	*= =r    r   Fr#   r$   r-   r(   r.   c                     t        j                  t        | |dd||      t        ||ddd||      t        ||dd||            S )a,  Build convolutional stem with MobileOne blocks.

    Args:
        in_chs: Number of input channels.
        out_chs: Number of output channels.
        inference_mode: Flag to instantiate model in inference mode. Default: ``False``

    Returns:
        nn.Sequential object with stem elements.
       r]   )r#   r$   r%   r&   r-   r(   r   )r#   r$   r%   r&   r   r-   r(   )r9   r   r"   )r#   r$   r-   r(   s       r   convolutional_stemr     sj      ==)	
 	)	
 	)	
% r    c                        e Zd ZU dZej
                  j                  e   ed<   	 	 	 	 dde	de	dede
de
dd	f fd
Zdej                  dej                  fdZ xZS )	AttentionzMulti-headed Self Attention module.

    Source modified from:
    https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
    
fused_attndimhead_dimqkv_bias	attn_drop	proj_dropr.   Nc                 r   t         |           ||z  dk(  sJ d       || _        ||z  | _        |dz  | _        t               | _        t        j                  ||dz  |      | _	        t        j                  |      | _        t        j                  ||      | _        t        j                  |      | _        y)a}  Build MHSA module that can handle 3D or 4D input tensors.

        Args:
            dim: Number of embedding dimensions.
            head_dim: Number of hidden dimensions per head. Default: ``32``
            qkv_bias: Use bias or not. Default: ``False``
            attn_drop: Dropout rate for attention tensor.
            proj_drop: Dropout rate for projection tensor.
        r   z#dim should be divisible by head_dimg      r   )r3   N)r7   r8   r   	num_headsscaler   r   r9   LinearqkvDropoutr   projr   )rD   r   r   r   r   r   rF   s         r   r8   zAttention.__init__  s    " 	X~"I$II" %
(*99S#'9I.IIc3'	I.r    rG   c                 V   |j                   \  }}}}||z  }|j                  d      j                  dd      }| j                  |      j	                  ||d| j
                  | j                        j                  ddddd      }|j                  d      \  }}	}
| j                  rPt        j                  j                  j                  ||	|
| j                  r| j                  j                   nd	      }nL|| j"                  z  }||	j                  dd      z  }|j%                  d
      }| j                  |      }||
z  }|j                  dd      j	                  |||      }| j'                  |      }| j)                  |      }|j                  dd      j	                  ||||      }|S )Nr]   rs   r   r   r   r           )	dropout_pr   )shapeflatten	transposer   r|   r   r   permuteunbindr   r_   r9   r`   scaled_dot_product_attentiontrainingr   pr   softmaxr   r   )rD   rG   BCHWNr   qkvattns               r   rM   zAttention.forward  su   WW
1aEIIaL""2r*HHQKWQ1dnndmm<WQ1a# 	
 **Q-1a??##@@1a.2mm$..** A A
 DJJAq{{2r**D<<B<'D>>$'DqAKK1%%aA.IIaLNN1KKB''1a3r    )    Fr   r   )r   r   r   r   r_   jitFinalr   __annotations__r   floatr8   r   rM   r   r   s   @r   r   r     s    
 		%%
 """// / 	/
 / / 
/: %,, r    r   c                        e Zd ZdZej
                  dddfdededededeej                     de	d	e	d
e	ddf fdZ
dej                  dej                  fdZ xZS )
PatchEmbedz$Convolutional patch embedding layer.F
patch_sizer&   r#   	embed_dimr-   lkc_use_actr)   r(   r.   Nc	                     t         	|           t        j                  t	        ||||dd||r|nd|	      t        ||ddd||            | _        y)a{  Build patch embedding layer.

        Args:
            patch_size: Patch size for embedding computation.
            stride: Stride for convolutional embedding layer.
            in_chs: Number of channels of input tensor.
            embed_dim: Number of embedding dimensions.
            inference_mode: Flag to instantiate model in inference mode. Default: ``False``
        r   r   N)	r#   r$   r%   r&   r   r   r)   r-   r(   F)r#   r$   r%   r&   r)   r-   r(   )r7   r8   r9   r   r   r"   r   )
rD   r   r&   r#   r   r-   r   r)   r(   rF   s
            r   r8   zPatchEmbed.__init__  sf    ( 	MM"!&'2)-
  !#-
	r    rG   c                 (    | j                  |      }|S r   )r   rD   rG   s     r   rM   zPatchEmbed.forwardJ  s    IIaLr    )r   r   r   r   r9   r   r   r   r   r   r8   r_   r   rM   r   r   s   @r   r   r     s    . *, % #(*
*
 *
 	*

 *
 BII*
 *
 *
 !*
 
*
X %,, r    r   c                   &     e Zd Zd fd	Zd Z xZS )LayerScale2dc                     t         |           || _        t        j                  |t        j                  |dd      z        | _        y )Nr   )r7   r8   inplacer9   	Parameterr_   onesr}   )rD   r   init_valuesr   rF   s       r   r8   zLayerScale2d.__init__P  s7    \\+

310E"EF
r    c                 n    | j                   r|j                  | j                        S || j                  z  S r   )r   mul_r}   r   s     r   rM   zLayerScale2d.forwardU  s(    %)\\qvvdjj!Eq4::~Er    )h㈵>F)r   r   r   r8   rM   r   r   s   @r   r   r   O  s    G
Fr    r   c                   p     e Zd ZdZ	 	 	 ddef fdZdej                  dej                  fdZd	dZ	 xZ
S )
RepMixerzReparameterizable token mixer.

    For more details, please refer to our paper:
    `FastViT: A Fast Hybrid Vision Transformer using Structural Reparameterization <https://arxiv.org/pdf/2303.14189.pdf>`_
    r(   c           	         t         |           || _        || _        || _        |rWt        j                  | j                  | j                  | j                  d| j                  dz  | j                  d      | _        yd| _        t        |||dddd      | _	        t        |||dd	      | _
        |t        ||      | _        yt        j                         | _        y)
a  Build RepMixer Module.

        Args:
            dim: Input feature map dimension. :math:`C_{in}` from an expected input of size :math:`(B, C_{in}, H, W)`.
            kernel_size: Kernel size for spatial mixing. Default: 3
            layer_scale_init_value: Initial value for layer scale. Default: 1e-5
            inference_mode: If True, instantiates model in inference mode. Default: ``False``
        r   r]   Tr%   r&   paddingr2   r3   NFr   )r   r*   r+   r,   )r   r*   )r7   r8   r   r%   r(   r9   r   r<   r"   normmixerr   layer_scaler:   )rD   r   r%   layer_scale_init_valuer(   rF   s        r   r8   zRepMixer.__init__`  s     	&, "		 ,,((A-xx!D !%D&!&"#DI (DJ &1#/5K#L #%;;= r    rG   r.   c                     | j                   | j                  |      }|S || j                  | j                  |      | j                  |      z
        z   }|S r   )r<   r   r   r   r   s     r   rM   zRepMixer.forward  sV    (!!!$A  D$$TZZ]TYYq\%ABBAr    c                 h   | j                   ry| j                  j                          | j                  j                          t	        | j
                  t              r| j                  j                  | j
                  j                  j                  d      | j                  j                  j                  | j                  j                  j                  z
  z  z   }t        j                  | j
                  j                        | j                  j                  j                  | j                  j                  j                  z
  z  }n| j                  j                  | j                  j                  j                  z   | j                  j                  j                  z
  }| j                  j                  j                  | j                  j                  j                  z
  }t        | j                   | j                   | j"                  d| j                   d      | _
        || j                  j                  _        || j                  j                  _        | j'                         D ]  \  }}d|v r|j)                           | j+                  d       | j+                  d       | j+                  d	       y)
ziReparameterize mixer and norm into a single
        convolutional layer for efficient inference.
        Nrs   r   Tr   r<   r   r   r   )r(   r   r[   r   rt   r   r   ro   r}   	unsqueezer<   rR   r_   squeezer3   r   r   r%   rS   rT   rU   rV   )rD   wbrY   rZ   s        r   r[   zRepMixer.reparameterize  s
    

!!#		  "d&&5

$$t'7'7'='='G'G'K

''..1G1G1N1NN( A d..445

'',,tyy/E/E/J/JJA
 

$$**))001))((//0 
 

'',,tyy/E/E/J/JJA)HHHH((88
 )*  %&'#//1 	JD$%LLN	 	! 'r    )r   r   Fr   )r   r   r   r   r   r8   r_   r   rM   r[   r   r   s   @r   r   r   Y  sC     #'#(31
 !31j %,, *(r    r   c                        e Zd ZdZddej
                  dfdedee   dee   deej                     de
d	df fd
Zdej                  d	dfdZdej                  d	ej                  fdZ xZS )ConvMlpzConvolutional FFN Module.Nr   r#   hidden_channelsr$   r-   dropr.   c                 Z   t         |           |xs |}|xs |}t        ||d|d      | _        t	        j
                  ||d      | _         |       | _        t	        j
                  ||d      | _        t	        j                  |      | _
        | j                  | j                         y)a_  Build convolutional FFN module.

        Args:
            in_chs: Number of input channels.
            hidden_channels: Number of channels after expansion. Default: None
            out_chs: Number of output channels. Default: None
            act_layer: Activation layer. Default: ``GELU``
            drop: Dropout rate. Default: ``0.0``.
           F)r%   r2   r6   r   )r%   N)r7   r8   r   ru   r9   r   fc1rC   fc2r   r   apply_init_weights)rD   r#   r   r$   r-   r   rF   s         r   r8   zConvMlp.__init__  s    " 	#V)3V
	 99V_!D;99_g1EJJt$	

4%%&r    mc                     t        |t        j                        rOt        |j                  d       |j
                  +t        j                  j                  |j
                  d       y y y )N{Gz?r   r   )rt   r9   r   r   rR   r3   init	constant_rD   r   s     r   r   zConvMlp._init_weights  sJ    a#!((-vv!!!!&&!, " $r    rG   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }|S r   )ru   r   rC   r   r   r   s     r   rM   zConvMlp.forward  sU    IIaLHHQKHHQKIIaLHHQKIIaLr    )r   r   r   r   r9   r   r   r   r   r   r   r8   r   r_   r   rM   r   r   s   @r   r   r     s    #
 .2%))+'' &c]' c]	'
 BII' ' 
'B-ryy -T - %,, r    r   c                        e Zd ZdZ	 	 	 ddedee   deeeeef   f   ddf fdZde	j                  de	j                  fd	Zdd
Z xZS )RepConditionalPosEnca"  Implementation of conditional positional encoding.

    For more details refer to paper:
    `Conditional Positional Encodings for Vision Transformers <https://arxiv.org/pdf/2102.10882.pdf>`_

    In our implementation, we can reparameterize this module to eliminate a skip connection.
    Nr   dim_outspatial_shaper.   c           
         t         t        |           t        |t              rt        |gdz        }t        |t              sJ dt        |       d       t        |      dk(  sJ dt        |       d       || _	        || _
        |xs || _        || _        |rPt        j                  | j                  | j                  | j                  d|d   dz  | j                  d      | _        y	d	| _        t        j                  | j                  | j                  |dt	        |d   dz        | j                  d
      | _        y	)at  Build reparameterizable conditional positional encoding

        Args:
            dim: Number of input channels.
            dim_out: Number of embedding dimensions. Default: 768
            spatial_shape: Spatial shape of kernel for positional encoding. Default: (7, 7)
            inference_mode: Flag to instantiate block in inference mode. Default: ``False``
        r]   z/"spatial_shape" must by a sequence or int, get z	 instead.z+Length of "spatial_shape" should be 2, got r   r   Tr   N)r2   r3   )r7   r	  r8   rt   r   tupler   typelenr  r   r
  r2   r9   r   r<   pos_enc)rD   r   r
  r  r(   rF   s        r   r8   zRepConditionalPosEnc.__init__  sA    	"D24mS)!=/A"56M-/ 	
&'y2	
/ =!Q& 	
}%&i1	
&
 +~# "		 ..%a(A-{{!D !%D99M!$)*{{DLr    rG   c                 l    | j                   | j                  |      }|S | j                  |      |z   }|S r   )r<   r  r   s     r   rM   zRepConditionalPosEnc.forward<  s>    (!!!$A  Q!#Ar    c           
         | j                   | j                  z  }t        j                  | j                   || j                  d   | j                  d   f| j
                  j                  j                  | j
                  j                  j                        }t        | j                         D ].  }d||||z  | j                  d   dz  | j                  d   dz  f<   0 |}|| j
                  j                  z   }| j
                  j                  }t        j                  | j                   | j                  | j                  dt        | j                  d   dz        | j                  d      | _        || j                  j                  _        || j                  j                  _        | j#                         D ]  \  }}d|v r|j%                           | j'                  d       y )	Nr   r   rp   r]   Tr   r<   r  )r   r2   r_   rz   r  r  rR   rq   rr   r@   r3   r9   r   r
  r   r<   rS   rT   rU   rV   )	rD   r   r   r   ro   w_finalb_finalrY   rZ   s	            r   r[   z#RepConditionalPosEnc.reparameterizeC  s   HH+	{{""1%""1%	 ,,%%++<<&&--	
 txx 	A  I""1%*""1%*,	 !	 dll111,,## IIHHLL****1-23;;
 )0  %&-#//1 	JD$%LLN	 	#r    )Nr   r   Fr   )r   r   r   r   r   r   r   r   r8   r_   r   rM   r[   r   r   s   @r   r	  r	    su     &*9? 44 c]4 !eCHo!56	4 
4l %,, +$r    r	  c                        e Zd ZdZddej
                  ddddfdeded	ed
eej                     dededede
f fdZd Z xZS )RepMixerBlockzImplementation of Metaformer block with RepMixer as token mixer.

    For more details on Metaformer structure, please refer to:
    `MetaFormer Is Actually What You Need for Vision <https://arxiv.org/pdf/2111.11418.pdf>`_
    r         @r   r   Fr   r%   	mlp_ratior-   r   	drop_pathr   r(   c	                 B   t         	|           t        ||||      | _        t	        |t        ||z        ||      | _        |t        ||      | _        nt        j                         | _        |dkD  rt        |      | _        yt        j                         | _        y)a,  Build RepMixer Block.

        Args:
            dim: Number of embedding dimensions.
            kernel_size: Kernel size for repmixer. Default: 3
            mlp_ratio: MLP expansion ratio. Default: 4.0
            act_layer: Activation layer. Default: ``nn.GELU``
            proj_drop: Dropout rate. Default: 0.0
            drop_path: Drop path rate. Default: 0.0
            layer_scale_init_value: Layer scale value at initialization. Default: 1e-5
            inference_mode: Flag to instantiate block in inference mode. Default: ``False``
        )r%   r   r(   r#   r   r-   r   Nr   )r7   r8   r   token_mixerr   r   mlpr   r   r9   r:   r   r  )
rD   r   r%   r  r-   r   r  r   r(   rF   s
            r   r8   zRepMixerBlock.__init__x  s    0 	###9)	
 i0	
 "-+C1GHD!{{}D09C),R[[]r    c                     | j                  |      }|| j                  | j                  | j                  |                  z   }|S r   )r  r  r   r  r   s     r   rM   zRepMixerBlock.forward  s=    Qt//<==r    )r   r   r   r   r9   r   r   r   r   r   r   r8   rM   r   r   s   @r   r  r  q  s      !")+"",0#(+S+S +S 	+S
 BII+S +S +S %*+S !+SZr    r  c                        e Zd ZdZdej
                  ej                  dddfdedede	ej                     de	ej                     d	ed
edef fdZd Z xZS )AttentionBlockzImplementation of metaformer block with MHSA as token mixer.

    For more details on Metaformer structure, please refer to:
    `MetaFormer Is Actually What You Need for Vision <https://arxiv.org/pdf/2111.11418.pdf>`_
    r  r   r   r   r  r-   
norm_layerr   r  r   c                    t         |            ||      | _        t        |      | _        |t        ||      | _        nt        j                         | _        |dkD  rt        |      nt        j                         | _
        t        |t        ||z        ||      | _        |t        ||      | _        nt        j                         | _        |dkD  rt        |      | _        yt        j                         | _        y)a  Build Attention Block.

        Args:
            dim: Number of embedding dimensions.
            mlp_ratio: MLP expansion ratio. Default: 4.0
            act_layer: Activation layer. Default: ``nn.GELU``
            norm_layer: Normalization layer. Default: ``nn.BatchNorm2d``
            proj_drop: Dropout rate. Default: 0.0
            drop_path: Drop path rate. Default: 0.0
            layer_scale_init_value: Layer scale value at initialization. Default: 1e-5
        r   Nr   r  )r7   r8   r   r   r  r   layer_scale_1r9   r:   r   
drop_path1r   r   r  layer_scale_2
drop_path2)	rD   r   r  r-   r"  r   r  r   rF   s	           r   r8   zAttentionBlock.__init__  s    , 	sO	$-!-!-c3I!JD!#D1:S(9-bkkmi0	
 "-!-c3I!JD!#D1:S(9-bkkmr    c           
          || j                  | j                  | j                  | j                  |                        z   }|| j	                  | j                  | j                  |                  z   }|S r   )r%  r$  r  r   r'  r&  r  r   s     r   rM   zAttentionBlock.forward  s^     2 243C3CDIIaL3Q RSS 2 2488A; ?@@r    )r   r   r   r   r9   r   r=   r   r   r   r   r8   rM   r   r   s   @r   r!  r!    s      #)+*,.."",0*T*T *T BII	*T
 RYY*T *T *T %**TXr    r!  c            "            e Zd Zdddddddej                  ej
                  ddd	ddfd
edededededededede	ej                     dededeej                     deej                     dedede	e   f  fdZd Z xZS )FastVitStageTFr   r]   Nr   r  r   r   r   r
  depthtoken_mixer_type
downsamplese_downsampledown_patch_sizedown_stridepos_emb_layerr%   r  r-   r"  proj_drop_ratedrop_path_rater   c                 *   t         |           d| _        |rt        ||||||||      | _        n ||k(  sJ t        j                         | _        |	 |	||      | _        nt        j                         | _        g }t        |      D ]p  }|dk(  r&|j                  t        ||
|||||   ||             .|dk(  r%|j                  t        |||||||   |             Xt        d	j                  |             t        j                  | | _        y)
aQ  FastViT stage.

        Args:
            dim: Number of embedding dimensions.
            depth: Number of blocks in stage
            token_mixer_type: Token mixer type.
            kernel_size: Kernel size for repmixer.
            mlp_ratio: MLP expansion ratio.
            act_layer: Activation layer.
            norm_layer: Normalization layer.
            proj_drop_rate: Dropout rate.
            drop_path_rate: Drop path rate.
            layer_scale_init_value: Layer scale value at initialization.
            inference_mode: Flag to instantiate block in inference mode.
        F)r   r&   r#   r   r)   r-   r   r(   N)r(   repmixer)r%   r  r-   r   r  r   r(   	attention)r  r-   r"  r   r  r   z"Token mixer type: {} not supported)r7   r8   grad_checkpointingr   r-  r9   r:   pos_embr@   appendr  r!  
ValueErrorformatr   blocks)rD   r   r
  r+  r,  r-  r.  r/  r0  r1  r%   r  r-   r"  r2  r3  r   r   r(   r<  	block_idxrF   s                        r   r8   zFastVitStage.__init__  s3   H 	"'(*"!$#'-	DO '>!> kkmDO$(PDL;;=DLu 	I:-m +'',,Y7+A#1	 	 "[0n''),,Y7+A  !8??@PQ /	4 mmV,r    c                     | j                  |      }| j                  |      }| j                  r6t        j                  j                         st        | j                  |      }|S | j                  |      }|S r   )r-  r8  r7  r_   r   is_scriptingr   r<  r   s     r   rM   zFastVitStage.forward=  s`    OOALLO""599+A+A+Ct{{A.A  AAr    )r   r   r   r9   r   r=   r   strr   r   r   r   r   r8   rM   r   r   s   @r   r*  r*    s     $"'#$ 15 ")+*,..$'$'6: 'V-V- V- 	V-
 "V- V-  V- !V- V- $BII.V- V- V- BIIV- RYYV- "V-  "!V-" %-UO#V-pr    r*  c            1       Z    e Zd ZU ej                  j
                  e   ed<   	 ddddddddd	d
ddddddddddej                  ej                  dfdedeedf   deedf   deedf   deedf   deedf   deedf   dededeeej                      df   dededed ed!ed"ed#eded$ed%ed&eej                      d'eej                      d(ed)d*f0 fd+Zd,ej                   d)d*fd-Zej                  j(                  d.        Zej                  j(                  dAd/       Zej                  j(                  dBd0       Zej                  j(                  d)ej                   fd1       ZdCded%ee   fd2Z	 	 	 	 	 dDd3ej4                  d4eeeee   f      d5ed6ed7ed8ed)eeej4                     eej4                  eej4                     f   f   fd9Z	 	 	 dEd4eeee   f   d:ed;efd<Zd3ej4                  d)ej4                  fd=ZdAd3ej4                  d>efd?Z d3ej4                  d)ej4                  fd@Z! xZ"S )Fr   	fork_featr   r]   r]      r]   r5  r5  r5  r5  @            r   r   r   r   )FTTT)FFFF  )NNNNr   r]   r   r   Fg       @avgin_chanslayers.token_mixers
embed_dims
mlp_ratiosdownsamplesse_downsamplesrepmixer_kernel_sizenum_classespos_embsr/  r0  	drop_rater2  r3  r   r   	cls_ratioglobal_poolr"  r-   r(   r.   Nc                 2   t         %|           |rdn|	| _        || _        || _        g | _        t        ||d   ||      | _        |d   }d}t        j                  d|t        |            j                  |      D cg c]  }|j                          }}g }t        t        |            D ]  }||   xs |||   k7  }t        d!i d|d||   d||   d|d||   d|d	|d
|
|   d||   d|d||   d|d|d|d||   d|d|d|}|j!                  |       ||   }|r|dz  }| xj
                  t#        |d|z  d|       gz  c_         t%        j&                  | | _        t        | j(                        | _        |x| _        | _        | j                  rg d| _        t3        | j0                        D ]b  \  } }!| dk(  r6t4        j6                  j9                  dd       r	 t%        j:                         }"n |||          }"d|! }#| j=                  |#|"       d nOt?        |d   |z        x| _        x| _        }$tA        |d   |$ddd|d|d	      | _!        tE        |$|	||       | _#        | jI                  | jJ                         y c c}w )"Nr   r   r   r
  r+  r-  r.  r/  r0  r1  r,  r%   r  r-   r"  r2  r3  r   r   r(   r]   r   stages.)num_chs	reductionmoduler   r   r]   r   
FORK_LAST3r   rs   r   T)	r#   r$   r%   r&   r   r(   r)   r-   r,   )	pool_typerX  r   )&r7   r8   rV  rB  rZ  feature_infor   stemr_   linspacesumsplittolistr@   r  r*  r9  dictr9   r   stages
num_stagesr4   head_hidden_sizeout_indices	enumerateosenvirongetr:   
add_moduler   r"   
final_convr   headr   r   )&rD   rN  rO  rP  rQ  rR  rS  rT  rU  rV  rW  r/  r0  rX  r2  r3  r   r   rB  rY  rZ  r"  r-   r(   prev_dimr   rG   dprrj  r   r-  stagei_embi_layerlayer
layer_namefinal_featuresrF   s&                                        r   r8   zFastVit.__init__N  s1   4 	 )1{"& 'qM	
	 a=#(>>!^S[#Q#W#WX^#_`aqxxz``s6{# 	eA$QD8z!}+DJ  "1 Qi &	
 -Q/ !0 ( 'qk ".a 1 %Q- $ &  .  #1v  (>!" (#$  .%E( MM% !!}H
$x1u9W^_`^aUb"c!dd5	e6 mmV,dkk*4<<D1 >>  ,D"+D,<,<"= 	3wA:"**..t"D KKME&z%'89E#G9-

E2	3 JMZXZ^^gMgIhhDh 5,!"~&-#"#
DO '%#	DI 	

4%%&M as   >Jr   c                    t        |t        j                        rjt        |j                  d       t        |t        j                        r8|j
                  +t        j                  j                  |j
                  d       yyyy)zInit. for classificationr  r  Nr   )rt   r9   r   r   rR   r3   r  r  r  s     r   r   zFastVit._init_weights  sZ    a#!((-!RYY'AFF,>!!!&&!, -?' $r    c                     t               S r   )setrD   s    r   no_weight_decayzFastVit.no_weight_decay  s	    ur    c                 2    t        d|rd      S g d      S )Nz^stemz^stages\.(\d+)))z^stages\.(\d+).downsampler   )z^stages\.(\d+).pos_embr  )z^stages\.(\d+)\.\w+\.(\d+)N)rd  r<  )ri  )rD   coarses     r   group_matcherzFastVit.group_matcher  s'    (.$
 	
5
 	
r    c                 4    | j                   D ]	  }||_         y r   )rj  r7  )rD   enabless      r   set_grad_checkpointingzFastVit.set_grad_checkpointing  s     	*A#)A 	*r    c                 .    | j                   j                  S r   )rt  fcr  s    r   get_classifierzFastVit.get_classifier  s    yy||r    c                 J    || _         | j                  j                  ||       y r   )rV  rt  reset)rD   rV  rZ  s      r   reset_classifierzFastVit.reset_classifier  s    &		[1r    rG   indicesr   
stop_early
output_fmtintermediates_onlyc                    |dv sJ d       g }t        t        | j                        |      \  }}	| j                  |      }| j                  dz
  }
t
        j                  j                         s|s| j                  }n| j                  d|	dz    }d}t        |      D ]#  \  }} ||      }||v s|j                  |       % |r|S ||
k(  r| j                  |      }||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )NCHWzOutput shape must be NCHW.r   Nr   )r   r  rj  rd  rk  r_   r   r?  rn  r9  rs  )rD   rG   r  r   r  r  r  intermediatestake_indices	max_indexlast_idxrj  feat_idxrw  s                 r   forward_intermediateszFastVit.forward_intermediates  s    * Y&D(DD&"6s4;;7G"Qi IIaL??Q&99!!#:[[F[[)a-0F(0 	(OHeaA<'$$Q'	(
   x"A-r    
prune_norm
prune_headc                     t        t        | j                        |      \  }}| j                  d|dz    | _        |r| j                  dd       |S )z@ Prune layers not required for specified intermediates.
        Nr   r    )r   r  rj  r  )rD   r  r  r  r  r  s         r   prune_intermediate_layersz!FastVit.prune_intermediate_layers  sM     #7s4;;7G"Qikk.9q=1!!!R(r    c                 <   | j                  |      }g }t        | j                        D ]Q  \  }} ||      }| j                  s|| j                  v s*t        | d|       } ||      }|j                  |       S | j                  r|S | j                  |      }|S )Nr   )rd  rn  rj  rB  rm  getattrr9  rs  )rD   rG   outsidxblockr"  x_outs          r   forward_featureszFastVit.forward_features   s    IIaL#DKK0 	'JCaA~~$***!(cU|!<J&qMEKK&	' >>KOOAr    
pre_logitsc                 N    |r| j                  |d      S | j                  |      S )NT)r  )rt  )rD   rG   r  s      r   forward_headzFastVit.forward_head1  s$    0:tyyty,L		!Lr    c                 f    | j                  |      }| j                  r|S | j                  |      }|S r   )r  rB  r  r   s     r   rM   zFastVit.forward4  s3    !!!$>>Ha r    F)Tr   )NFFr  F)r   FT)#r   r   r   r_   r   r   r   r   r9   r=   r   r   r   r@  r   r   r   r   r8   r   ignorer  r  r  r  r  r   r   r   r  r  r  r  rM   r   r   s   @r   r   r   G  s   yyt$$ &2,\*=,4,E/K()#8C#$ "$'$',0 %#"$*,..)+#(1q'q' #s(Oq'  S/	q'
 c3hq' eSj)q' tSy)q' "$),q' #&q' q' HRYY/45q' !q' q' q' "q'  "!q'" %*#q'$ %q'& 'q'( )q'* +q', RYY-q'. BII/q'0 !1q'2 
3q'f-ryy -T - YY  YY
 
 YY* * YY		  2C 2hsm 2 8<$$',, ||,  eCcN34,  	, 
 ,  ,  !%,  
tELL!5tELL7I)I#JJ	K, ` ./$#	3S	>*  	%,, 5<< "Mell M M %,, r    c                 0    | dddddt         t        ddd
|S )	NrL  )r   rI  rI  )   r  g?bicubic)zstem.0.conv_kxk.0.convzstem.0.conv_scale.convzhead.fc)
urlrV  
input_size	pool_sizecrop_pctinterpolationmeanr   
first_conv
classifierr	   )r  kwargss     r   _cfgr  <  s4    #"%#J  r    zfastvit_t8.apple_in1kztimm/)	hf_hub_idzfastvit_t12.apple_in1kzfastvit_s12.apple_in1kzfastvit_sa12.apple_in1kzfastvit_sa24.apple_in1kzfastvit_sa36.apple_in1kzfastvit_ma36.apple_in1kgffffff?)r  r  zfastvit_t8.apple_dist_in1kzfastvit_t12.apple_dist_in1kzfastvit_s12.apple_dist_in1kzfastvit_sa12.apple_dist_in1kzfastvit_sa24.apple_dist_in1kzfastvit_sa36.apple_dist_in1kzfastvit_ma36.apple_dist_in1kzfastvit_mci0.apple_mclipzapple/mobileclip_s0_timmzXhttps://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s0.ptrJ  )r   r   r   )      ?r  r  )r  r  r  rV  r  r   zfastvit_mci1.apple_mclipzapple/mobileclip_s1_timmzXhttps://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s1.ptzfastvit_mci2.apple_mclipzapple/mobileclip_s2_timmzXhttps://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/mobileclip_s2.ptc                 6   d| v r| S | j                  d|       } d| v rd}nd}ddl}ddl}g }| j                         D ]C  \  }}|j	                  d|      }|s|j                  t        |j                  d	                   E t        t        t        |                  }i }	| j                         D ]d  \  }}|r||vr|j                  |d      }|j                  d
d      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd|      }|j                  d      r|j                  dd      }|j                  d d!      }|j                  d"      r|d#k(  rt        |j                   d$      rrt#        |j                   j$                  t&        j(                        rD|j                  d#d%      }|j*                  }t-        j.                  |j0                  d         |	d&<   n|j                  d"d'      }|j	                  d(|      }d)\  }
}|r,t        |j                  d*            }|j3                  ||      }
|
_d+| }d,|
 }|d-z   |v r|j                  |d-z   |d.z         }n5|d/z   |v r|j                  |d/z   |d0z         }n|j                  ||d1z         }||	|<   g |	S )2z$ Remap original checkpoints -> timm zstem.0.conv_kxk.0.conv.weight
state_dictz8image_encoder.model.patch_embed.0.rbr_conv.0.conv.weightzimage_encoder.model.r  r   Nz^(.*?)network\.(\d+)\.proj.*r]   patch_embedrd  rbr_convrA   	rbr_scalerB   rbr_skipr>   conv_exprs  
lkb_originr   convffnr  z	se.reducezse.fc1z	se.expandzse.fc2zlayer_scale_([0-9])zlayer_scale_\1.gammar   zlayer_scale.gamma	dist_head	head_distzhead.z	head.projr  zhead.fc.weightzhead.fc.biaszhead.fc.z^network\.(\d+))NNr   znetwork.r\  z.projz.downsample.projz.pez.pos_emb.pos_encz.blocks)rq  rebisectitemsmatchr9  r   grouplistsortedr  replacesubendswith
startswithrW   rt  rt   r  r9   r   Tr_   rz   r   bisect_right)r  modelprefixr  r  
stage_endsr   r   r  out_dict	stage_idxnet_idx
net_prefixstage_prefixs                 r   checkpoint_filter_fnr    s   &*4j9JAZO' J  " 318!<c%++a.123 fS_-.JH  " -1Q		&"%A IImV,IIj*-IIk<0IIj*-IIj,/IIlL1IIi'IIk8,IIk8,FF)+BAF::m$		-)<=AIIk;/<< KGEJJ$=*UZZ]]\^\e\eBfIIk+;<CC+0;;qwwqz+B(IIgz2 +Q/'	7%++a.)G++J@I #G9-J$YK0LG#q(IIj72LCU4UVe#q(IIj50,AS2STIIj,*BC[-\ Or    c                 r    |j                  dd      }t        t        | |ft        t	        d|      d|}|S )Nrm  r`  T)flatten_sequentialrm  )pretrained_filter_fnfeature_cfg)popr   r   r  ri  )variant
pretrainedr  rm  r  s        r   _create_fastvitr    sJ    **]L9K  2DkJ E Lr    c           	      N    t        dddd      }t        dd| it        |fi |S )z%Instantiate FastViT-T8 model variant.)r]   r]   r   r]   )0   `      i  r   r   r   r   rE  rO  rQ  rR  rP  r  )
fastvit_t8ri  r  r  r  
model_argss      r   r  r    s:     %E	J ]J]$zB\U[B\]]r    c           	      N    t        dddd      }t        dd| it        |fi |S )z&Instantiate FastViT-T12 model variant.rC  rF  r  rE  r  r  )fastvit_t12r  r  s      r   r  r    :     &E	J ^Z^4
C]V\C]^^r    c           	      N    t        dddd      }t        dd| it        |fi |S )z&Instantiate FastViT-S12 model variant.rC  rF  rK  rE  r  r  )fastvit_s12r  r  s      r   r  r    r  r    c                 v    t        ddddddt        t        d      fd      }t        d
d	| it        |fi |S )z'Instantiate FastViT-SA12 model variant.rC  rF  rK  Nr  r  r5  r5  r5  r6  rO  rQ  rR  rW  rP  r  )fastvit_sa12ri  r   r	  r  r  s      r   r  r    sO     &dG,@PV$WXFJ _j_DD^W]D^__r    c                 v    t        ddddddt        t        d      fd      }t        d
d	| it        |fi |S )z'Instantiate FastViT-SA24 model variant.)r   r      r   rF  rK  Nr  r  r  r  r  )fastvit_sa24r  r  s      r   r  r    O     &dG,@PV$WXFJ _j_DD^W]D^__r    c                 v    t        ddddddt        t        d      fd      }t        d
d	| it        |fi |S )z'Instantiate FastViT-SA36 model variant.rD  rD     rD  rF  rK  Nr  r  r  r  r  )fastvit_sa36r  r  s      r   r  r    r  r    c                 v    t        ddddddt        t        d      fd      }t        d
d	| it        |fi |S )z'Instantiate FastViT-MA36 model variant.r   )L      i0  i`  rK  Nr  r  r  r  r  )fastvit_ma36r  r  s      r   r  r  +  r  r    c                 z    t        dddddddt        t        d      fdd	
      }t        dd| it        |fi |S )zInstantiate MCi0 model variant.)r]   rD  
   r]   rF  r  FFTTNr  r  r  TrO  rQ  rR  rT  rW  rP  r   r  )fastvit_mci0r  r  s      r   r  r  8  sU     &1dG,@PV$WXFJ _j_DD^W]D^__r    c                 z    t        dddddddt        t        d      fdd	
      }t        dd| it        |fi |S )zInstantiate MCi1 model variant.)r   r     r   rF  r  r	  Nr  r  r  Tr
  r  )fastvit_mci1r  r  s      r   r  r  G  U     &1dG,@PV$WXFJ _j_DD^W]D^__r    c                 z    t        dddddddt        t        d      fdd	
      }t        dd| it        |fi |S )zInstantiate MCi2 model variant.)r   r     r   )P      i@  i  r  r	  Nr  r  r  Tr
  r  )fastvit_mci2r  r  s      r   r  r  V  r  r    )r  r  )Bro  	functoolsr   typingr   r   r   r   r   r_   torch.nnr9   	timm.datar
   r   timm.layersr   r   r   r   r   r   r   _builderr   	_featuresr   _manipulater   	_registryr   r   __all__r   r   r"   r   r   r   r   r   r   r   r   r   r   r   r	  r  r!  r*  r   r  default_cfgsr  r  r  r  r  r  r  r  r  r  r  r  r   r    r   <module>r      s   
  5 5   A   * + ' <+&n=RYY n=bT=RYY T=t &(WW$	*** 		?* 	*
 ]]*ZA		 AH1 1hF299 Fm(ryy m(`1bii 1hq$299 q$h7BII 7t6RYY 6r`299 `Frbii rj  % ;&T;& d;& d;& t ;& t ;& t ;& t ;&& !$#';&* "4$+;&0 "4$1;&4 #D%5;&8 #D%9;&< #D%=;&B #D%C;&L ,f|!M;&Z ,f|![;&h ,f|!i;& ;|FR
 ^ ^ _ _ _ _ 	` 	` 	` 	` 	` 	` 	` 	` ` ` ` ` ` `r    