
    kh|                        d dl mZ d dlmZmZmZmZmZmZ d dl	Z	d dl
mZ d dlmc mZ d dlmZmZ d dlmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZm Z  dd	l!m"Z"m#Z#m$Z$m%Z%m&Z&m'Z' dd
l(m)Z) ddl*m+Z+ ddl,m-Z-m.Z. ddl/m0Z0m1Z1 ddgZ2e+ G d dejf                               Z4 G d dejf                        Z5 G d dejf                        Z6d&de7de8de6fdZ9d&de7de8de6fdZ:	 	 	 	 d'de7de;de8de8de6f
dZ<d(de7fdZ= e0 e=dd        e=dd        e=d !      d"      Z>e1d&de8de6fd#       Z?e1d&de8de5fd$       Z@e1d&de8de5fd%       ZAy))    )partial)CallableListOptionalSequenceTupleUnionNIMAGENET_INCEPTION_MEANIMAGENET_INCEPTION_STD)	SelectAdaptivePool2dLinear	LayerTypePadType	RmsNorm2dConvNormActcreate_conv2dget_norm_act_layer	to_2tuple   )build_model_with_cfg)SqueezeExciteUniversalInvertedResidual)	BlockArgsEfficientNetBuilderdecode_arch_defefficientnet_init_weightsround_channelsresolve_act_layer)feature_take_indices)register_notrace_module)checkpoint_seq
checkpoint)generate_default_cfgsregister_modelMobileNetV5MobileNetV5Encoderc                        e Zd ZdZ	 	 	 	 	 	 ddeeee   f   dedededede	e   de
d	e	e   d
e	e   f fdZdeej                     dej                  fdZ xZS )"MobileNetV5MultiScaleFusionAdaptera  Multi-layer fusion token adapter.

  Args:
    in_chs: List of input channel counts for each feature scale.
    out_chs: The number of output channels.
    output_resolution: The output resolution.
    expansion_ratio: The FFN expansion ratio.
    interpolation_mode: The upsampling interpolation mode.
    layer_scale_init_value: The initial value of the layer scale, no layer scale if None.
  in_chsout_chsoutput_resolutionexpansion_ratiointerpolation_modelayer_scale_init_valuenoskip	act_layer
norm_layerc
           
         t         
|           t        |t              rt	        |      n|| _        || _        t        |      | _        || _	        || _
        || _        || _        |xs t        j                  }|	xs t        }	t!        | j
                  | j                  d| j                  ||	| j                  | j                        | _         |	| j                        | _        y )Nr   )r*   r+   dw_kernel_size_mid	exp_ratior1   r2   r0   r/   )super__init__
isinstancer   sumin_channelsout_channelsr   r,   r-   r.   r/   r0   nnGELUr   r   ffnnorm)selfr*   r+   r,   r-   r.   r/   r0   r1   r2   	__class__s             S/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/mobilenetv5.pyr7   z+MobileNetV5MultiScaleFusionAdapter.__init__&   s     
G&0&Bs6{DD&'89D*D0D"8DDK$RWWI(yJ(!!&&{{#::	DH 4,,-DI    inputsreturnc                    |d   j                   dd  }g }t        |      D ]]  \  }}|j                   dd  }|d   |d   k  s|d   |d   k  r"t        j                  ||| j                        }|j                  |       _ t        j                  |d      }| j                  |      }|d   | j                  d   k7  s|d   | j                  d   k7  r|d   | j                  d   z  dk7  s|d   | j                  d   z  dk7  r#t        j                  || j                  d      }nF|d   | j                  d   z  }|d   | j                  d   z  }	t        j                  |||	f||	f      }| j                  |      }|S )Nr   r   )sizemode)dimbilinear)kernel_sizestride)shape	enumerateFinterpolater.   appendtorchcatr>   r,   
avg_pool2dr?   )
r@   rD   high_resolutionresized_inputs_img	feat_sizechannel_cat_imgs	h_strides	w_stridess
             rB   forwardz*MobileNetV5MultiScaleFusionAdapter.forwardJ   s   Qioobc*ONF# #3IIbcN	Q</!,,	!q?Q0Q--/@W@WXCc"	# yyQ7
((#
$CqT33A66/!:LPTPfPfghPi:i A!7!7!::a?A!7!7!::a?--$*@*@zRC'*d.D.DQ.GGI'*d.D.DQ.GGI,,&	2!9-C ))C.CJrC   )g       @nearestNTNN)__name__
__module____qualname____doc__r	   intr   floatstrr   boolr   r7   rS   Tensorr^   __classcell__rA   s   @rB   r)   r)      s    	  "%"+26)-*.".c49n%". ". 	".
 ".  ". !)". ". I&". Y'".HD. 5<< rC   r)   c            )       F    e Zd ZdZdddddddd	dd
d
d
d
deddd
dfdedededededededede	e   dede
e   de
e   de
e   de
e   dedededede
e   d ef( fd!Zd" Zej"                  j$                  d9d#efd$       Zej"                  j$                  d:d%efd&       Zej"                  j$                  d'ej,                  fd(       Zd;ded efd)Z	 	 	 	 	 	 d<d*ej2                  d+e
eee	e   f      d,ed-ed.ed/ed0ed'ee	ej2                     eej2                  e	ej2                     f   f   fd1Z	 	 	 	 d=d+eee	e   f   d2ed3ed0efd4Zd*ej2                  d'ej2                  fd5Zd9d*ej2                  d6ed'ej2                  fd7Zd*ej2                  d'ej2                  fd8Z  xZ!S )>r&   z MobiletNet-V5
            F    T)rG   N        avg
block_argsnum_classesin_chans	stem_sizefix_stemnum_featurespad_typeuse_msfamsfa_indicesmsfa_output_resolutionr1   r2   aa_layerse_layerse_from_expround_chs_fn	drop_ratedrop_path_rater/   global_poolc                    t         |           |xs t        j                  }|xs t        }t        ||      }|xs t        }|| _        || _        d| _	        |	| _
        |
| _        |s ||      }t        ||dd|||      | _        t        d|||||||||
      }t        j                   |||       | _        |j"                  | _        | j$                  D cg c]  }|d   	 c}| _        |j(                  | _        |r|x| _        | _        t/        t1        | j$                        | j                        d   | _
        t3        | j                  D cg c]  }| j$                  |   d	    c}      | _        t7        | j4                  || j                  ||
      | _        t;        |      | _        d| _        d| _         n|j(                  | _        || _        d| _        t;        |      | _        | j*                  | j<                  jC                         z  }tE        || j,                  d|      | _         || j,                        | _         |rt        jF                  d      nt        jH                         | _%        |dkD  rtM        | j,                  |      nt        jH                         | _'        tQ        |        yc c}w c c}w )a  
        Args:
            block_args: Arguments for blocks of the network.
            num_classes: Number of classes for classification head.
            in_chans: Number of input image channels.
            stem_size: Number of output channels of the initial stem convolution.
            fix_stem: If True, don't scale stem by round_chs_fn.
            num_features: Number of output channels of the conv head layer.
            head_bias: If True, add a learnable bias to the conv head layer.
            pad_type: Type of padding to use for convolution layers.
            act_layer: Type of activation layer.
            norm_layer: Type of normalization layer.
            aa_layer: Type of anti-aliasing layer.
            se_layer: Type of Squeeze-and-Excite layer.
            se_from_exp: If True, calculate SE channel reduction from expanded mid channels.
            round_chs_fn: Callable to round number of filters based on depth multiplier.
            drop_rate: Dropout rate.
            drop_path_rate: Stochastic depth rate.
            layer_scale_init_value: Enable layer scale on compatible blocks if not None.
            global_pool: Type of pooling to use for global pooling features of the FC head.
        Frm      rL   rM   paddingr2   r1       
output_strider{   r   r   r1   r2   r   r   r   r/   stager   num_chsr*   r+   r,   r2   r1   	pool_typeNr   )r   ))r6   r7   r<   r=   r   r   r   rv   r   grad_checkpointingr}   r~   r   	conv_stemr   
Sequentialblocksfeaturesfeature_info
stage_endsr*   rz   head_hidden_sizer    lenr9   msfa_in_chsr)   msfar   r   	conv_head	norm_head	feat_multr   FlattenIdentityflattenr   
classifierr   )r@   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r1   r2   r   r   r   r   r   r   r/   r   norm_act_layerbuilderfminum_pooled_chsrA   s                             rB   r7   zMobileNetV5.__init__p   s   X 	(	,9
+J	B,}&""'(&<# $Y/I$!
 &%#!)#9
 mmWY
%CD#,,/3/@/@A!1W:A#NN 8DDD 5 4S9J9J5KTM^M^ _`a bD"tO`O`#aD$5$5b$9)$D#abD:''$"&"="=%#DI  4kJD!DN!DN 'D$0D!DI3kJD!..1A1A1K1K1MMN*>4;P;PRS]efDN+D,A,ABDN(3rzz!}HSVW&!6!6D]_]h]h]j!$'C B $bs   J=Kc                    | j                   | j                  g}|j                  | j                         |j	                  | j
                         | j                  |j	                  | j                         | j                  |j	                  | j                         |j                  t        j                         t        j                  | j                        | j                  g       t        j                  | S N)r   bn1extendr   rR   r   r   r   r<   r   Dropoutr   r   r   )r@   layerss     rB   as_sequentialzMobileNetV5.as_sequential   s    ..$((+dkk"d&&'>>%MM$..)>>%MM$..)rzz|RZZ%?QR}}f%%rC   coarsec                 .    t        d|rd      S d      S )Nz^conv_stem|bn1z^blocks\.(\d+)z^blocks\.(\d+)\.(\d+))stemr   )dict)r@   r   s     rB   group_matcherzMobileNetV5.group_matcher   s%    "(.$
 	
4L
 	
rC   enablec                     || _         y r   )r   )r@   r   s     rB   set_grad_checkpointingz"MobileNetV5.set_grad_checkpointing   s
    "(rC   rE   c                     | j                   S r   )r   )r@   s    rB   get_classifierzMobileNetV5.get_classifier   s    rC   c                    || _         t        |      | _        |rt        j                  d      nt        j
                         | _        |dkD  rt        | j                  |      | _	        y t        j
                         | _	        y )Nr   r   r   )
rv   r   r   r<   r   r   r   r   r   r   )r@   rv   r   s      rB   reset_classifierzMobileNetV5.reset_classifier   s[    &/+F(3rzz!}HSVW&!6!6D]_]h]h]jrC   xindicesr?   
stop_early
output_fmtintermediates_onlyextra_blocksc                 R   |dv sJ d       |r	|sJ d       g }|r&t        t        | j                        dz   |      \  }	}
nMt        t        | j                        |      \  }	}
|	D cg c]  }| j                  |    }	}| j                  |
   }
d}| j	                  |      }||	v r|j                  |       t        j                  j                         s|s| j                  }n| j                  d|
 }|D ]%  }|dz  } ||      }||	v s|j                  |       ' |r|S ||fS c c}w )aa   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
            extra_blocks: Include outputs of all blocks and head conv in output, does not align with feature_info
        Returns:

        NCHWOutput shape must be NCHW./Must use intermediates_only for early stopping.r   r   N)	r    r   r   r   r   rR   rS   jitis_scripting)r@   r   r   r?   r   r   r   r   intermediatestake_indices	max_indexifeat_idxr   blks                  rB   forward_intermediatesz!MobileNetV5.forward_intermediates  sB   . Y&D(DD&%X'XX%&:3t{{;Ka;OQX&Y#L)&:3t;OQX&Y#L)8DE1DOOA.ELE	2I NN1|#  #99!!#:[[F[[),F 	(CMHAA<'$$Q'		(   
 -; Fs   'D$
prune_norm
prune_headc                 p   |r&t        t        | j                        dz   |      \  }}n1t        t        | j                        |      \  }}| j                  |   }| j                  d| | _        |t        | j                        k  rd| _        d| _        |r d| _        d| _        | j                  dd       |S )z@ Prune layers not required for specified intermediates.
        r   Nr   rp   )r    r   r   r   r   r   r   )r@   r   r   r   r   r   r   s          rB   prune_intermediate_layersz%MobileNetV5.prune_intermediate_layersE  s     &:3t{{;Ka;OQX&Y#L)&:3t;OQX&Y#L)	2Ikk*9-s4;;''!DN!DN!DN!DN!!!R(rC   c                    | j                   d}g }| j                  |      }|| j                  v r|j                  |       | j                  D ]/  }|dz  } ||      }|| j                  v s|j                  |       1 | j                  |      }|S | j                  |      }| j
                  r8t        j                  j                         st        | j                  |d      }|S | j	                  |      }|S )Nr   r   T)r   )
r   r   r}   rR   r   r   rS   r   r   r"   r@   r   r   r   r   s        rB   forward_featureszMobileNetV5.forward_features]  s    99 HMq!A4,,,$$Q'{{ ,AFt000!((+, 		-(A  q!A&&uyy/E/E/G"4;;4@  KKNrC   
pre_logitsc                 Z   | j                  |      }| j                  | j                  |      }| j                  | j                  |      }| j                  |      }| j                  dkD  r,t        j                  || j                  | j                        }|r|S | j                  |      S )Nrs   )ptraining)	r   r   r   r   r   rP   dropoutr   r   )r@   r   r   s      rB   forward_headzMobileNetV5.forward_headt  s    Q>>%q!A>>%q!ALLO>>B		!t~~FAHq!!rC   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r@   r   s     rB   r^   zMobileNetV5.forward  s'    !!!$a rC   F)T)rt   NFFr   FF)r   FTF)"r`   ra   rb   rc   r   r   rd   rg   rf   r   r   r   r   re   r7   r   rS   r   ignorer   r   r<   Moduler   r   rh   r	   r   r   r   r   r   r^   ri   rj   s   @rB   r&   r&   l   s     $" $!&2*,-1.2,0,0 $%3!$&6:$+t(!t( t( 	t(
 t( t( t( t( t( s)t( %(t(  	*t( !+t( y)t( y)t(  !t(" ##t($ %t(& "'t(( %-UO)t(* +t(l	& YY
D 
 
 YY)T ) ) YY		  kC kc k 8<$$',!&< ||<  eCcN34<  	< 
 <  <  !%<  <  
tELL!5tELL7I)I#JJ	K< @ ./$#!&3S	>*  	
 0%,, 5<< ."ell " " " %,, rC   c            "           e Zd ZdZddddddddddd	ed
d
dfdededededede	e   dede
e   de
e   de
e   de
e   dedededede
e   f  fdZ	 	 	 	 	 	 d(dej                   de
eeee   f      deded ed!ed"ed#eeej                      eej                   eej                      f   f   fd$Zdej                   d#ej                   fd%Zdej                   d#ej                   fd&Zdej                   d#ej                   fd'Z xZS ))r'   zMobileNetV5 Vision Encoderrm   @   Frp   )rG   rr   rn   NTrs   ru   rw   rx   ry   r{   r}   r~   r1   r2   r   r   r   r   r   r   r/   c                 "   t         |           |xs t        j                  }|	xs t        }	|xs t
        }d| _        || _        d| _        |s ||      }t        ||dd||	|      | _
        t        d|||||	|
|||
      }t        j                   |||       | _        |j                  | _        | j                  D cg c]  }|d   	 c}| _        d	x| _        | _        t'        t)        | j                        |      d   | _        t-        | j*                  D cg c]  }| j                  |   d
    c}      | _        || _        t3        | j.                  | j"                  | j0                  |	|      | _        t7        |        y c c}w c c}w )Nr   Frm   r   r   r   r   r   ro   r   r   )r6   r7   r<   r=   r   r   rv   r   r   r   r   r   r   r   r   r   r   rz   r   r    r   r}   r9   r   r~   r)   r   r   )r@   ru   rw   rx   ry   r{   r}   r~   r1   r2   r   r   r   r   r   r   r/   r   r   r   rA   s                       rB   r7   zMobileNetV5Encoder.__init__  s   & 	(	,9
,}""' $Y/I$!
 &%#!)#9
 mmWY
%CD#,,/3/@/@A!1W:A488D10T5F5F1GVWXY4K\K\]R 1 1" 5i @]^&<#6##%%"99!
	 	"$'! B
  ^s    FFr   r   r?   r   r   r   r   rE   c                    ~|dv sJ d       |r	|sJ d       g }g }	|r&t        t        | j                        dz   |      \  }
}nMt        t        | j                        |      \  }
}|
D cg c]  }| j                  |    }
}| j                  |   }d}| j	                  |      }||
v r|j                  |       || j                  v r|	j                  |       t        j                  j                         s|s| j                  }n| j                  d| }|D ]D  }|dz  } ||      }||
v r|j                  |       || j                  v s4|	j                  |       F |r|S | j                  |	      |fS c c}w )al   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: (Unused) Applies norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
            extra_blocks: Include outputs of all blocks and head conv in output, does not align with feature_info
        Returns:

        r   r   r   r   r   N)r    r   r   r   r   rR   r}   rS   r   r   r   )r@   r   r   r?   r   r   r   r   r   msfa_intermediatesr   r   r   r   r   r   s                   rB   r   z(MobileNetV5Encoder.forward_intermediates  s   . Y&D(DD&%X'XX% &:3t{{;Ka;OQX&Y#L)&:3t;OQX&Y#L)8DE1DOOA.ELE	2I NN1|#  #t(((%%a(99!!#:[[F[[),F 	-CMHAA<'$$Q'4,,,"))!,	-   yy+,m;;7 Fs   *E4c                    d}g }| j                  |      }|| j                  v r|j                  |       | j                  D ]/  }|dz  } ||      }|| j                  v s|j                  |       1 | j	                  |      S )Nr   r   )r   r}   rR   r   r   r   s        rB   r   z#MobileNetV5Encoder.forward_features  s    NN1t(((  #;; 	(CMHAA4,,,$$Q'	( yy''rC   c                     t        d      )Nz=MobileNetV5Encoder does not support classification use cases.)NotImplementedErrorr   s     rB   r   zMobileNetV5Encoder.forward_head(  s    !"abbrC   c                 $    | j                  |      S r   )r   r   s     rB   r^   zMobileNetV5Encoder.forward+  s    $$Q''rC   r   )r`   ra   rb   rc   r   r   rd   rg   rf   r   r   r   r   re   r7   rS   rh   r	   r   r   r   r   r   r^   ri   rj   s   @rB   r'   r'     s   $
 "*2*,-1.2,0,0 $%3!$&6:#F(!F( F( 	F(
 F( F( #3-F( %(F(  	*F( !+F( y)F( y)F( F( #F( F(  "!F(" %-UO#F(V 8<$$',!&C<||C< eCcN34C< 	C<
 C< C< !%C< C< 
tELL!5tELL7I)I#JJ	KC<J(%,, (5<< ("cell cu|| c( (%,, (rC   variant
pretrainedrE   c                 t    |j                  dd      }t        |d      }d}t        t        | |fd||d|}|S )Nout_indicesr   r   r   rm      getterr   feature_cls)rv   rz   	head_conv	head_bias	head_normr   F)pretrained_strictfeature_cfgkwargs_filter)popr   r   r'   )r   r   kwargsr   r   r   models          rB   _create_mnv5_encoderr   /  s[    **]O<K;HEKM !  # E LrC   c                 n    |j                  dd      }t        |d      }t        t        | |fd|d|}|S )Nr   r   r   r   F)r   r   )r   r   r   r&   )r   r   r   r   r   r   s         rB   _create_mnv5r   F  sO    **]O<K;HEK    E LrC   channel_multiplierencoderc           	         d| v rg dg dg dg dg}ng dg dg dg dg}t        t        ||      d	|d
k  t        t        |      t        t
        j                  d      }t        |fi |}|rt        | |fi |}|S t        | |fi |}|S )Nmobilenetv5_base)er_r1_k3_s2_e4_c128er_r1_k3_s1_e4_c128r  )uir_r1_a3_k5_s2_e6_c256uir_r1_a5_k0_s1_e4_c256uir_r1_a3_k0_s1_e4_c256r  r	  )uir_r1_a5_k5_s2_e6_c512uir_r1_a5_k0_s1_e4_c512r  uir_r1_a0_k0_s1_e1_c512mqa_r1_k3_h8_s2_d64_c512uir_r1_a0_k0_s1_e2_c512r  r  r  r  r  r  r  r  r  r  )uir_r1_a5_k5_s2_e6_c1024mqa_r1_k3_h16_s1_d64_c1024uir_r1_a0_k0_s1_e2_c1024r  r  r  r  r  r  r  r  r  r  r  r  )%uir_r1_a5_k5_s2_e6_c640uir_r1_a5_k0_s1_e4_c640r  r  r  r  r  r  uir_r1_a0_k0_s1_e1_c640mqa_r1_k3_h12_v2_s1_d64_c640uir_r1_a0_k0_s1_e2_c640r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )'uir_r1_a5_k5_s2_e6_c1280mqa_r1_k3_h16_s1_d96_c1280uir_r1_a0_k0_s1_e2_c1280r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  )
group_sizer         ?)
multipliergh㈵>)ru   rx   ry   r   r2   r1   r/   )	r   r   r   r   r   r<   r=   r   r   )	r   r  r  r   r  r   arch_defmodel_kwargsr   s	            rB   _gen_mobilenet_v5r  T  s     W$&G4%
p&P(qa%
F "8
C#c)^8JK''#L //L$WjILI L WjALALrC   urlc                 0    | dddddt         t        ddd
|S )	Nrl   )rm      r"  )rn   rn   r  bicubiczconv_stem.convr   )
r   rv   
input_size	pool_sizecrop_pctinterpolationmeanstd
first_convr   r
   )r   r   s     rB   _cfgr+    s0    4}S[)'0F&l	
  rC   )rm      r,  )r$  rv   rl   )rv   )mobilenetv5_300m_encmobilenetv5_300mzmobilenetv5_base.untrainedc                 J    |j                  dd      }t        	 d| d|d|}|S )zMobileNet V5 Vision Encoderr{   sameT)r   r  r{   )r-  )r   r  )r   r   r{   r   s       rB   r-  r-  #  s@     zz*f-H	
 E LrC   c                      t        dd| i|}|S )Nr   )r.  r  r   r   r   s      rB   r.  r.  1      RZR6RELrC   c                      t        dd| i|}|S )Nr   )r  r2  r3  s      rB   r  r  7  r4  rC   r   )r  NFF)rp   )B	functoolsr   typingr   r   r   r   r   r	   rS   torch.nnr<   torch.nn.functional
functionalrP   	timm.datar   r   timm.layersr   r   r   r   r   r   r   r   r   _builderr   _efficientnet_blocksr   r   _efficientnet_builderr   r   r   r   r   r   	_featuresr    _features_fxr!   _manipulater"   r#   	_registryr$   r%   __all__r   r)   r&   r'   rf   rg   r   r   re   r  r+  default_cfgsr-  r.  r   rC   rB   <module>rG     s    C C     E   + J& & + 1 3 <.
/ O O OdX")) Xve( e(P# 4 N` .# 4 FX   %( pp!p 	p
 p pfc  %     #'#& $ 
T 
@R 
 
  K  
  K  rC   