
    kh
                        d Z ddlZddlmZ ddlmZmZmZmZm	Z	m
Z
 ddlZddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZmZm Z  ddl!m"Z"m#Z#m$Z$ ddgZ% G d dejL                        Z' G d dejL                        Z( G d dejL                        Z) G d dejL                        Z* G d dejL                        Z+ G d dejL                        Z,didejL                  de-de.de/ddf
dZ0d Z1djde,fdZ2dkdee-ef   fd Z3 e"i d! e3       d" e3       d# e3       d$ e3d%d&'      d( e3d%d)d*+      d, e3       d- e3d%d.'      d/ e3d%d0d*+      d1 e3d%d2d3d4d5d6d78      d9 e3d%d:d3d4d5d6;      d< e3ee=      d> e3d%d?ee@      dA e3d%dBee@      dC e3d%dDee@      dE e3d%dFee@      dG e3d%dHee@      dI e3d%dJee@       e3d%dKee@       e3d%dLee@       e3d%dMee@       e3d%dNee@       e3d%dOee@       e3d%dPee@       e3        e3d%dQ'       e3       dR	      Z4e#djde,fdS       Z5e#djde,fdT       Z6e#djde,fdU       Z7e#djde,fdV       Z8e#djde,fdW       Z9e#djde,fdX       Z:e#djde,fdY       Z;e#djde,fdZ       Z<e#djde,fd[       Z=e#djde,fd\       Z>e#djde,fd]       Z?e#djde,fd^       Z@e#djde,fd_       ZAe#djde,fd`       ZBe#djde,fda       ZC e$eDd$d-d9d1dIdbdcdddedfdgdh       y)luj   MLP-Mixer, ResMLP, and gMLP in PyTorch

This impl originally based on MLP-Mixer paper.

Official JAX impl: https://github.com/google-research/vision_transformer/blob/linen/vit_jax/models_mixer.py

Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601

@article{tolstikhin2021,
  title={MLP-Mixer: An all-MLP Architecture for Vision},
  author={Tolstikhin, Ilya and Houlsby, Neil and Kolesnikov, Alexander and Beyer, Lucas and Zhai, Xiaohua and Unterthiner,
        Thomas and Yung, Jessica and Keysers, Daniel and Uszkoreit, Jakob and Lucic, Mario and Dosovitskiy, Alexey},
  journal={arXiv preprint arXiv:2105.01601},
  year={2021}
}

Also supporting ResMlp, and a preliminary (not verified) implementations of gMLP

Code: https://github.com/facebookresearch/deit
Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404
@misc{touvron2021resmlp,
      title={ResMLP: Feedforward networks for image classification with data-efficient training},
      author={Hugo Touvron and Piotr Bojanowski and Mathilde Caron and Matthieu Cord and Alaaeldin El-Nouby and
        Edouard Grave and Armand Joulin and Gabriel Synnaeve and Jakob Verbeek and Hervé Jégou},
      year={2021},
      eprint={2105.03404},
}

Paper: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
@misc{liu2021pay,
      title={Pay Attention to MLPs},
      author={Hanxiao Liu and Zihang Dai and David R. So and Quoc V. Le},
      year={2021},
      eprint={2105.08050},
}

A thank you to paper authors for releasing code and weights.

Hacked together by / Copyright 2021 Ross Wightman
    N)partial)AnyDictListOptionalUnionTuple)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)
PatchEmbedMlpGluMlpGatedMlpDropPathlecun_normal_	to_2tuple   )build_model_with_cfg)feature_take_indices)named_apply
checkpointcheckpoint_seq)generate_default_cfgsregister_modelregister_model_deprecations
MixerBlockMlpMixerc                        e Zd ZdZde eej                  d      ej                  ddfde	de	de
eeeef   f   d	ed
ededededdf fdZdej                   dej                   fdZ xZS )r   zResidual Block w/ token mixing and channel MLPs.

    Based on: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
          ?      @ư>eps        dimseq_len	mlp_ratio	mlp_layer
norm_layer	act_layerdrop	drop_pathreturnNc	                 F   t         |           t        |      D 	cg c]  }	t        |	|z         c}	\  }
} ||      | _         |||
||      | _        |dkD  rt        |      nt        j                         | _	         ||      | _
         |||||      | _        yc c}	w )a  Initialize MixerBlock.

        Args:
            dim: Dimension of input features.
            seq_len: Sequence length.
            mlp_ratio: Expansion ratios for token mixing and channel MLPs.
            mlp_layer: MLP layer class.
            norm_layer: Normalization layer.
            act_layer: Activation layer.
            drop: Dropout rate.
            drop_path: Drop path rate.
        r+   r,   r%   N)super__init__r   intnorm1
mlp_tokensr   nnIdentityr-   norm2mlp_channels)selfr&   r'   r(   r)   r*   r+   r,   r-   x
tokens_dimchannels_dim	__class__s               Q/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/mlp_mixer.pyr2   zMixerBlock.__init__?   s    . 	:CI:N#OQCCL#O 
L_
#GZ9SWX09B),BKKM_
%c<9SWX $Ps   Br;   c           	         || j                  | j                  | j                  |      j                  dd            j                  dd            z   }|| j                  | j	                  | j                  |                  z   }|S Forward pass.r      )r-   r5   r4   	transposer9   r8   r:   r;   s     r?   forwardzMixerBlock.forward^   sm    ttzz!}/F/Fq!/LMWWXY[\]^^t00A?@@    )__name__
__module____qualname____doc__r   r   r6   	LayerNormGELUr3   r   floatr	   typer2   torchTensorrF   __classcell__r>   s   @r?   r   r   :   s     <F!&r||> gg!YY Y UE%,$778	Y
 Y Y Y Y Y 
Y> %,, rG   c                   d     e Zd ZdZdeddf fdZdej                  dej                  fdZ xZ	S )AffinezAffine transformation layer.r&   r.   Nc                     t         |           t        j                  t	        j
                  dd|f            | _        t        j                  t	        j                  dd|f            | _        y)zXInitialize Affine layer.

        Args:
            dim: Dimension of features.
        r   N)	r1   r2   r6   	ParameterrP   onesalphazerosbeta)r:   r&   r>   s     r?   r2   zAffine.__init__h   sN     	\\%**aC["9:
LLaC[!9:	rG   r;   c                 X    t        j                  | j                  | j                  |      S )zApply affine transformation.)rP   addcmulr[   rY   rE   s     r?   rF   zAffine.forwardr   s    }}TYY

A66rG   )
rH   rI   rJ   rK   r3   r2   rP   rQ   rF   rR   rS   s   @r?   rU   rU   e   s4    &;C ;D ;7 7%,, 7rG   rU   c                        e Zd ZdZdeeej                  dddfdedede	de
d	e
d
e
de	de	de	ddf fdZdej                  dej                  fdZ xZS )ResBlockzResidual MLP block w/ LayerScale and Affine 'norm'.

    Based on: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404
       g-C6?r%   r&   r'   r(   r)   r*   r+   init_valuesr,   r-   r.   Nc
                    t         |           t        ||z        }
 ||      | _        t	        j
                  ||      | _        |	dkD  rt        |	      nt	        j                         | _	         ||      | _
         |||
||      | _        t	        j                  |t        j                  |      z        | _        t	        j                  |t        j                  |      z        | _        y)a  Initialize ResBlock.

        Args:
            dim: Dimension of input features.
            seq_len: Sequence length.
            mlp_ratio: Channel MLP expansion ratio.
            mlp_layer: MLP layer class.
            norm_layer: Normalization layer.
            act_layer: Activation layer.
            init_values: Initial values for layer scale.
            drop: Dropout rate.
            drop_path: Drop path rate.
        r%   r0   N)r1   r2   r3   r4   r6   Linearlinear_tokensr   r7   r-   r8   r9   rW   rP   rX   ls1ls2)r:   r&   r'   r(   r)   r*   r+   ra   r,   r-   channel_dimr>   s              r?   r2   zResBlock.__init__|   s    2 	#	/*_
YYw809B),BKKM_
%c;)RVW<<ejjo =><<ejjo =>rG   r;   c           
      B   || j                  | j                  | j                  | j                  |      j	                  dd            j	                  dd      z        z   }|| j                  | j
                  | j                  | j                  |            z        z   }|S rA   )r-   re   rd   r4   rD   rf   r9   r8   rE   s     r?   rF   zResBlock.forward   s    txx$*<*<TZZ]=T=TUVXY=Z*[*e*efgij*kklltxx$*;*;DJJqM*JJKKrG   )rH   rI   rJ   rK   r   rU   r6   rM   r3   rN   rO   r2   rP   rQ   rF   rR   rS   s   @r?   r_   r_   w   s      !!% gg!%!!?!? !? 	!?
 !? !? !? !? !? !? 
!?F %,, rG   r_   c            	            e Zd ZdZej
                  fdedededdf fdZddZ	d	e
j                  de
j                  fd
Z xZS )SpatialGatingUnitzcSpatial Gating Unit.

    Based on: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
    r&   r'   r*   r.   Nc                 |    t         |           |dz  } ||      | _        t        j                  ||      | _        y)zInitialize Spatial Gating Unit.

        Args:
            dim: Dimension of input features.
            seq_len: Sequence length.
            norm_layer: Normalization layer.
        rC   N)r1   r2   normr6   rc   proj)r:   r&   r'   r*   gate_dimr>   s        r?   r2   zSpatialGatingUnit.__init__   s7     	!8x(	IIgw/	rG   c                     t         j                  j                  | j                  j                  d       t         j                  j                  | j                  j                         y)z'Initialize weights for projection gate.r"   stdN)r6   initnormal_rm   weightones_biasr:   s    r?   init_weightszSpatialGatingUnit.init_weights   s:     			((d3
diinn%rG   r;   c                     |j                  dd      \  }}| j                  |      }| j                  |j                  dd            }||j                  dd      z  S )zApply spatial gating.rC   r&   )chunkrl   rm   rD   )r:   r;   uvs       r?   rF   zSpatialGatingUnit.forward   sT    wwqbw!1IIaLIIakk"b)*1;;r2&&&rG   )r.   N)rH   rI   rJ   rK   r6   rL   r3   rO   r2   rx   rP   rQ   rF   rR   rS   s   @r?   rj   rj      sU     CE,, 0C 0# 04 0SW 0&' '%,, 'rG   rj   c                        e Zd ZdZde eej                  d      ej                  ddfde	de	de
d	ed
edede
de
ddf fdZdej                  dej                  fdZ xZS )SpatialGatingBlockzpResidual Block w/ Spatial Gating.

    Based on: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
    r`   r"   r#   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   Nc	                     t         |           t        ||z        }	 ||      | _        t	        t
        |      }
 |||	||
|      | _        |dkD  rt        |      | _
        yt        j                         | _
        y)a  Initialize SpatialGatingBlock.

        Args:
            dim: Dimension of input features.
            seq_len: Sequence length.
            mlp_ratio: Channel MLP expansion ratio.
            mlp_layer: MLP layer class.
            norm_layer: Normalization layer.
            act_layer: Activation layer.
            drop: Dropout rate.
            drop_path: Drop path rate.
        )r'   )r+   
gate_layerr,   r%   N)r1   r2   r3   rl   r   rj   r9   r   r6   r7   r-   )r:   r&   r'   r(   r)   r*   r+   r,   r-   rg   sgur>   s              r?   r2   zSpatialGatingBlock.__init__   sk    . 	#	/*sO	'9%c;)X[bfg09B),BKKMrG   r;   c                 j    || j                  | j                  | j                  |                  z   }|S rB   )r-   r9   rl   rE   s     r?   rF   zSpatialGatingBlock.forward   s-    t001>??rG   )rH   rI   rJ   rK   r   r   r6   rL   rM   r3   rN   rO   r2   rP   rQ   rF   rR   rS   s   @r?   r   r      s      !&&r||> gg!RR R 	R
 R R R R R 
R< %,, rG   r   c            %           e Zd ZdZdddddddee eej                  d	
      ej                  ddddddfde
de
de
de
de
de
deeeeef   f   dededededededededededd f$ fd!Zej&                  j(                  d7dedd fd"       Zej&                  j(                  d7d#edeeef   fd$       Zej&                  j(                  d8d%edd fd&       Zej&                  j(                  dej4                  fd'       Zd9de
dee   dd fd(Z	 	 	 	 	 d:d)ej<                  d*eee
ee
   f      d+ed,ed-ed.edeeej<                     eej<                  eej<                     f   f   fd/Z 	 	 	 d;d*ee
ee
   f   d0ed1edee
   fd2Z!d)ej<                  dej<                  fd3Z"d7d)ej<                  d4edej<                  fd5Z#d)ej<                  dej<                  fd6Z$ xZ%S )<r   zMLP-Mixer model architecture.

    Based on: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
                     r   r"   r#   r%   Favgnum_classesimg_sizein_chans
patch_size
num_blocks	embed_dimr(   block_layerr)   r*   r+   	drop_rateproj_drop_ratedrop_path_ratenlhb	stem_normglobal_poolr.   Nc                    t         |           || _        || _        |x| _        x| _        | _        d| _        t        |||||r|
nd      | _	        t        | j                  d      r| j                  j                         n|}t        j                  t        |      D cg c]&  } ||| j                  j                  ||	|
|||      ( c} | _        t        |      D cg c]  }t#        d| ||       c}| _         |
|      | _        t        j(                  |      | _        |dkD  r t        j,                  || j                        nt        j.                         | _        | j3                  |	       yc c}w c c}w )
aB  Initialize MLP-Mixer.

        Args:
            num_classes: Number of classes for classification.
            img_size: Input image size.
            in_chans: Number of input channels.
            patch_size: Patch size.
            num_blocks: Number of mixer blocks.
            embed_dim: Embedding dimension.
            mlp_ratio: MLP expansion ratio(s).
            block_layer: Block layer class.
            mlp_layer: MLP layer class.
            norm_layer: Normalization layer.
            act_layer: Activation layer.
            drop_rate: Head dropout rate.
            proj_drop_rate: Projection dropout rate.
            drop_path_rate: Drop path rate.
            nlhb: Use negative log bias initialization.
            stem_norm: Apply normalization to stem.
            global_pool: Global pooling type.
        FN)r   r   r   r   r*   
feat_ratio)r)   r*   r+   r,   r-   zblocks.)modulenum_chs	reductionr   )r   )r1   r2   r   r   num_featureshead_hidden_sizer   grad_checkpointingr   stemhasattrr   r6   
Sequentialrangenum_patchesblocksdictfeature_inforl   Dropout	head_droprc   r7   headrx   )r:   r   r   r   r   r   r   r(   r   r)   r*   r+   r   r   r   r   r   r   r   _ir>   s                        r?   r2   zMlpMixer.__init__   sn   R 	&&ENNND1DN"'!%.zD
	 /6dii.NDII((*T^	mm :&&(  		%%#%##(	&( ) Y^^hXikSTD'!yINky)	I.>IAoBIIi)9)9:SUS^S^S`	t$%&(ks   !+E:"E?c                     |r t        j                  | j                         nd}t        t	        t
        |      |        y)zrInitialize model weights.

        Args:
            nlhb: Use negative log bias initialization for head.
        r%   )	head_bias)r   N)mathlogr   r   r   _init_weights)r:   r   r   s      r?   rx   zMlpMixer.init_weightsA  s0     48TXXd..//R	GMY?MrG   coarsec                      t        dddg      S )zCreate regex patterns for parameter grouping.

        Args:
            coarse: Use coarse grouping.

        Returns:
            Dictionary mapping group names to regex patterns.
        z^stem)z^blocks\.(\d+)N)z^norm)i )r   r   )r   )r:   r   s     r?   group_matcherzMlpMixer.group_matcherK  s     -/CD
 	
rG   enablec                     || _         y)zEnable or disable gradient checkpointing.

        Args:
            enable: Whether to enable gradient checkpointing.
        N)r   )r:   r   s     r?   set_grad_checkpointingzMlpMixer.set_grad_checkpointingZ  s     #)rG   c                     | j                   S )zGet the classifier module.)r   rw   s    r?   get_classifierzMlpMixer.get_classifierc  s     yyrG   c                     || _         ||dv sJ || _        |dkD  r&t        j                  | j                  |      | _        yt        j
                         | _        y)zReset the classifier head.

        Args:
            num_classes: Number of classes for new classifier.
            global_pool: Global pooling type.
        N) r   r   )r   r   r6   rc   r   r7   r   )r:   r   r   s      r?   reset_classifierzMlpMixer.reset_classifierh  sT     '"+---*D>IAoBIIdnnk:	SUS^S^S`	rG   r;   indicesrl   
stop_early
output_fmtintermediates_onlyc           	      $   |dv sJ d       |dk(  }g }t        t        | j                        |      \  }	}
|j                  \  }}}}| j	                  |      }t
        j                  j                         s|s| j                  }n| j                  d|
dz    }t        |      D ]m  \  }}| j                  r+t
        j                  j                         st        ||      }n ||      }||	v sJ|j                  |r| j                  |      n|       o |ra| j                  j                  ||f      \  }}|D cg c]6  }|j                  |||d      j                  dddd	      j!                         8 }}|r|S | j                  |      }||fS c c}w )
aV  Forward features that returns intermediates.

        Args:
            x: Input image tensor.
            indices: Take last n blocks if int, all if None, select matching indices if sequence.
            norm: Apply norm layer to all intermediates.
            stop_early: Stop iterating over blocks when last desired intermediate hit.
            output_fmt: Shape of intermediate feature outputs ('NCHW' or 'NLC').
            intermediates_only: Only return intermediate features.

        Returns:
            List of intermediate features or tuple of (final features, intermediates).
        )NCHWNLCz)Output format must be one of NCHW or NLC.r   Nr   rz   r   r   rC   )r   lenr   shaper   rP   jitis_scripting	enumerater   r   appendrl   dynamic_feat_sizereshapepermute
contiguous)r:   r;   r   rl   r   r   r   r   intermediatestake_indices	max_indexBr   heightwidthr   r   blkHWys                        r?   forward_intermediateszMlpMixer.forward_intermediatesu  s   , _,Y.YY,&"6s4;;7G"Qi  gg1feIIaL99!!#:[[F[[)a-0F' 	BFAs&&uyy/E/E/GsA&FL $$TTYYq\qA	B 99..?DAq^klYZQYYq!Q3;;Aq!QGRRTlMl  IIaL- ms   7;F
prune_norm
prune_headc                     t        t        | j                        |      \  }}| j                  d|dz    | _        |rt        j                         | _        |r| j                  dd       |S )aE  Prune layers not required for specified intermediates.

        Args:
            indices: Indices of intermediate layers to keep.
            prune_norm: Whether to prune normalization layer.
            prune_head: Whether to prune the classifier head.

        Returns:
            List of indices that were kept.
        Nr   r   r   )r   r   r   r6   r7   rl   r   )r:   r   r   r   r   r   s         r?   prune_intermediate_layersz"MlpMixer.prune_intermediate_layers  s]      #7s4;;7G"Qikk.9q=1DI!!!R(rG   c                     | j                  |      }| j                  r5t        j                  j	                         st        | j                  |      }n| j                  |      }| j                  |      }|S )z/Forward pass through feature extraction layers.)r   r   rP   r   r   r   r   rl   rE   s     r?   forward_featureszMlpMixer.forward_features  sV    IIaL""599+A+A+Ct{{A.AAAIIaLrG   
pre_logitsc                     | j                   dk(  r|j                  d      }| j                  |      }|r|S | j                  |      S )zForward pass through classifier head.

        Args:
            x: Feature tensor.
            pre_logits: Return features before final classifier.

        Returns:
            Output tensor.
        r   r   r{   )r   meanr   r   )r:   r;   r   s      r?   forward_headzMlpMixer.forward_head  sD     u$1ANN1q0DIIaL0rG   c                 J    | j                  |      }| j                  |      }|S r   )r   r   rE   s     r?   rF   zMlpMixer.forward  s'    !!!$a rG   F)T)N)NFFr   F)r   FT)&rH   rI   rJ   rK   r   r   r   r6   rL   rM   r3   r   rN   r	   rO   boolstrr2   rP   r   ignorerx   r   r   r   r   Moduler   r   r   rQ   r   r   r   r   r   rF   rR   rS   s   @r?   r   r      s@     $  ;E *!&r||> gg!$&$&#$%J%J% J% 	J%
 J% J% J% UE%,$778J% J% J% J% J% J% "J% "J%  !J%" #J%$ %J%& 
'J%X YYN N$ N N YY
D 
T#s(^ 
 
 YY)T )T ) ) YY		  aC ahsm aW[ a  8<$$',7 ||7  eCcN347  	7 
 7  7  !%7  
tELL!5tELL7I)I#JJ	K7 v ./$#	3S	>*  	
 
c0%,, 5<< 1ell 1 1 1 %,, rG   r   namer   flaxr.   c                    t        | t        j                        rC|j                  d      rTt        j                  j                  | j                         t        j                  j                  | j                  |       y|rKt        | j                         | j                  t        j                  j                  | j                         yt        j                  j                  | j                         | j                  Zd|v r,t        j                  j                  | j                  d       yt        j                  j                  | j                         yyyt        | t        j                        rLt        | j                         | j                  *t        j                  j                  | j                         yyt        | t        j                  t        j                  t        j                  f      rSt        j                  j!                  | j                         t        j                  j                  | j                         yt#        | d      r| j%                          yy)zMixer weight initialization (trying to match Flax defaults).

    Args:
        module: Module to initialize.
        name: Module name.
        head_bias: Bias value for head layer.
        flax: Use Flax-style initialization.
    r   Nmlpr"   rp   rx   )
isinstancer6   rc   
startswithrr   zeros_rt   	constant_rv   r   xavier_uniform_rs   Conv2drL   BatchNorm2d	GroupNormru   r   rx   )r   r   r   r   s       r?   r   r     sw    &"))$??6"GGNN6==)GGfkk95fmm,;;*GGNN6;;/ ''6;;*}>v{{3	 + + 
FBII	&fmm$;;"GGNN6;;' #	FR\\2>>2<<H	I
fmm$
v{{#		( 	 
)rG   c                 J   d| v ri }| j                         D ]  \  }}|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd	      }|j                  d
      s|j                  d      r|j                  ddd      }|||<    |S | S )z Remap checkpoints if needed zpatch_embed.proj.weightzpatch_embed.zstem.zattn.zlinear_tokens.zmlp.zmlp_channels.gamma_lsz.alphaz.betar   rz   )itemsreplaceendswithr   )
state_dictmodelout_dictkr   s        r?   checkpoint_filter_fnr    s     J.$$& 	DAq		.'2A		'#34A		&/2A		(D)Azz(#qzz'':IIaB'HQK	 rG   c                 r    |j                  dd      }t        t        | |ft        t	        |d      d|}|S )Nout_indicesr   getter)r  feature_cls)pretrained_filter_fnfeature_cfg)popr   r   r  r   )variant
pretrainedkwargsr  r   s        r?   _create_mixerr    sJ    **]A.K  2[hG E LrG   c                 "    | ddd dddddddd	|S )
Nr   )r   r   r         ?bicubicT)r    r    r    z	stem.projr   )urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizer   rq   
first_conv
classifier )r  r  s     r?   _cfgr  ,  s1    =tI4!  rG   zmixer_s32_224.untrainedzmixer_s16_224.untrainedzmixer_b32_224.untrainedz mixer_b16_224.goog_in21k_ft_in1kztimm/zlhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_mixer_b16_224-76587d61.pth)	hf_hub_idr  zmixer_b16_224.goog_in21kzrhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_mixer_b16_224_in21k-617b3de2.pthiSU  )r  r  r   zmixer_l32_224.untrainedz mixer_l16_224.goog_in21k_ft_in1kzlhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_mixer_l16_224-92f9adc4.pthzmixer_l16_224.goog_in21kzrhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_mixer_l16_224_in21k-846aa33c.pthzmixer_b16_224.miil_in21kzvhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/mixer_b16_224_miil_in21k-2a558a71.pth)r%   r%   r%   )      ?r  r  r  bilineari+  )r  r  r   rq   r  r  r   z mixer_b16_224.miil_in21k_ft_in1kzphttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tresnet/mixer_b16_224_miil-9229a591.pth)r  r  r   rq   r  r  zgmixer_12_224.untrained)r   rq   zgmixer_24_224.ra3_in1kzohttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gmixer_24_224_raa-7daf7ae6.pth)r  r  r   rq   zresmlp_12_224.fb_in1kz9https://dl.fbaipublicfiles.com/deit/resmlp_12_no_dist.pthzresmlp_24_224.fb_in1kz9https://dl.fbaipublicfiles.com/deit/resmlp_24_no_dist.pthzresmlp_36_224.fb_in1kz9https://dl.fbaipublicfiles.com/deit/resmlp_36_no_dist.pthzresmlp_big_24_224.fb_in1kz:https://dl.fbaipublicfiles.com/deit/resmlpB_24_no_dist.pthzresmlp_12_224.fb_distilled_in1kz6https://dl.fbaipublicfiles.com/deit/resmlp_12_dist.pthz6https://dl.fbaipublicfiles.com/deit/resmlp_24_dist.pthz6https://dl.fbaipublicfiles.com/deit/resmlp_36_dist.pthz7https://dl.fbaipublicfiles.com/deit/resmlpB_24_dist.pthz6https://dl.fbaipublicfiles.com/deit/resmlpB_24_22k.pthz6https://dl.fbaipublicfiles.com/deit/resmlp_12_dino.pthz6https://dl.fbaipublicfiles.com/deit/resmlp_24_dino.pthznhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/gmlp_s16_224_raa-10536d42.pth)	resmlp_24_224.fb_distilled_in1kresmlp_36_224.fb_distilled_in1k#resmlp_big_24_224.fb_distilled_in1k"resmlp_big_24_224.fb_in22k_ft_in1kzresmlp_12_224.fb_dinozresmlp_24_224.fb_dinozgmlp_ti16_224.untrainedzgmlp_s16_224.ra3_in1kzgmlp_b16_224.untrainedc                 >    t        ddddd|}t        dd| i|}|S )zv Mixer-S/32 224x224
    Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
        r   r   r   r   r   r  r  )mixer_s32_224r   r  r  r  
model_argsr   s       r?   r&  r&    1    
 KSKFKJOjOJOELrG   c                 >    t        ddddd|}t        dd| i|}|S )zw Mixer-S/16 224x224
    Paper:  'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
    r   r   r   r%  r  r  )mixer_s16_224r'  r(  s       r?   r,  r,    r*  rG   c                 >    t        ddddd|}t        dd| i|}|S )zw Mixer-B/32 224x224
    Paper:  'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
    r$        r%  r  r  )mixer_b32_224r'  r(  s       r?   r0  r0    1    
 LcLVLJOjOJOELrG   c                 >    t        ddddd|}t        dd| i|}|S )z Mixer-B/16 224x224. ImageNet-1k pretrained weights.
    Paper:  'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
    r   r.  r/  r%  r  r  )mixer_b16_224r'  r(  s       r?   r3  r3    r1  rG   c                 >    t        ddddd|}t        dd| i|}|S )zx Mixer-L/32 224x224.
    Paper:  'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
    r$        r%  r  r  )mixer_l32_224r'  r(  s       r?   r7  r7    1    
 MdMfMJOjOJOELrG   c                 >    t        ddddd|}t        dd| i|}|S )z Mixer-L/16 224x224. ImageNet-1k pretrained weights.
    Paper:  'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
    r   r5  r6  r%  r  r  )mixer_l16_224r'  r(  s       r?   r:  r:    r8  rG   c           
      h    t        dddddt        t        j                  d|}t	        dd| i|}|S )	zV Glu-Mixer-12 224x224
    Experiment by Ross Wightman, adding SwiGLU to MLP-Mixer
    r   r.    r  r!   r   r   r   r(   r)   r+   r  r  )gmixer_12_224r   r   r6   SiLUr  r(  s       r?   r?  r?    F    
  7"zBGG7/57J OjOJOELrG   c           
      h    t        dddddt        t        j                  d|}t	        dd| i|}|S )	zV Glu-Mixer-24 224x224
    Experiment by Ross Wightman, adding SwiGLU to MLP-Mixer
    r   r5  r<  r=  r>  r  r  )gmixer_24_224r@  r(  s       r?   rD  rD    rB  rG   c           
      T    t        dddddt        t        d|}t        dd| i|}|S )	zx ResMLP-12
    Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404
    r   r.  r<  r`   r   r   r   r(   r   r*   r  r  )resmlp_12_224)r   r_   rU   r  r(  s       r?   rG  rG    sC    
  u"qhciumsuJOjOJOELrG   c                 j    t        d	ddddt        t        d      t        d|}t	        d
d| i|}|S )zx ResMLP-24
    Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404
    r   r5  r<  r`   gh㈵>ra   rF  r  r  )resmlp_24_224r   r   r_   rU   r  r(  s       r?   rJ  rJ    L    
  V"qH$7FVNTVJ OjOJOELrG   c                 j    t        d	ddddt        t        d      t        d|}t	        d
d| i|}|S )zx ResMLP-36
    Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404
    r   $   r<  r`   r"   rI  rF  r  r  )resmlp_36_224rK  r(  s       r?   rO  rO    rL  rG   c                 j    t        d	ddddt        t        d      t        d|}t	        d
d| i|}|S )zz ResMLP-B-24
    Paper: `ResMLP: Feedforward networks for image classification...` - https://arxiv.org/abs/2105.03404
    r   r5  r/  r`   r"   rI  rF  r  r  )resmlp_big_24_224rK  r(  s       r?   rQ  rQ    sL    
  VsaH$7FVNTVJ S*S
SELrG   c           
      T    t        dddddt        t        d|}t        dd| i|}|S )	zU gMLP-Tiny
    Paper: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
    r            r   r   r   r(   r   r)   r  r  )gmlp_ti16_224r   r   r   r  r(  s       r?   rW  rW    sC    
  &"qN`&$&J OjOJOELrG   c           
      T    t        dddddt        t        d|}t        dd| i|}|S )	zV gMLP-Small
    Paper: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
    r   rS     rU  rV  r  r  )gmlp_s16_224rX  r(  s       r?   r[  r[  *  C    
  &"qN`&$&J NZN:NELrG   c           
      T    t        dddddt        t        d|}t        dd| i|}|S )	zU gMLP-Base
    Paper: `Pay Attention to MLPs` - https://arxiv.org/abs/2105.08050
    r   rS  r   rU  rV  r  r  )gmlp_b16_224rX  r(  s       r?   r^  r^  6  r\  rG   r  r   r!  r"  rG  rJ  )mixer_b16_224_in21kmixer_l16_224_in21kmixer_b16_224_miilmixer_b16_224_miil_in21kresmlp_12_distilled_224resmlp_24_distilled_224resmlp_36_distilled_224resmlp_big_24_distilled_224resmlp_big_24_224_in22ft1kresmlp_12_224_dinoresmlp_24_224_dino)r%   Fr   )r   )ErK   r   	functoolsr   typingr   r   r   r   r   r	   rP   torch.nnr6   	timm.datar
   r   timm.layersr   r   r   r   r   r   r   _builderr   	_featuresr   _manipulater   r   r   	_registryr   r   r   __all__r   r   rU   r_   rj   r   r   r   rN   r   r   r  r  r  default_cfgsr&  r,  r0  r3  r7  r:  r?  rD  rG  rJ  rO  rQ  rW  r[  r^  rH   r  rG   r?   <module>ru     sH  'P   : :   A ] ] ] * + @ @ Y Y
$( (V7RYY 7$,ryy ,^'		 '@& &Rtryy tn%")) %3 %5 %T %^b %P"
( 
d38n  % a&tva&tva& tva& 'z)	a&  A!a& tva& 'z)a&$  A!%a&2  E|e:ch!3a&< '~|e:)=a&H t)>DXYIa&J d}"(<>Ka&T TG"(<>Ua&\ TG"(<	>]a&f TG"(<>ga&n  H"(<">oa&x &tD"(<(>ya&@ (,D"(<(> (,D"(<(> ,0E"(<,>
 +/D"(<+>
 "D"(<> "D"(<>
  $v!| #fAa& aH                                  X            H==< :@@@#H"F))' rG   