
    kh              
          d Z ddlZddlmZ ddlmZmZmZmZm	Z	m
Z
 ddlZddlmZ ddlmc mZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZmZ ddlm Z m!Z! dd	l"m#Z# dd
l$m%Z% ddl&m'Z' ddl(m)Z)m*Z* dgZ+de,de	ejZ                     fdZ.e'dee,   dej^                  dej^                  fd       Z0dej^                  dee,   dee,   dej^                  fdZ1 G d dejZ                        Z2 G d dejZ                        Z3 G d dejZ                        Z4 G d dejZ                        Z5 G d  d!ejZ                        Z6 G d" dejZ                        Z7dKd#Z8 e i d$ e8d%d&'      d( e8d%d&d)      d* e8d%d&'      d+ e8d%d&d)      d, e8d%d&'      d- e8d%d&d)      d. e8d%d&'      d/ e8d%d&d)      d0 e8d%d&'      d1 e8d%d&d)      d2 e8d%d&'      d3 e8d%d&d)      d4 e8d%d5d67      d8 e8d%d5d67      d9 e8d%d:d5d6;      d< e8d%d:d5d6;      d= e8d5d6>            Z9dLd?Z:dMd@e;dAe<de7fdBZ=e!dMdC       Z>e!dMdD       Z?e!dMdE       Z@e!dMdF       ZAe!dMdG       ZBe!dMdH       ZCe!dMdI       ZDe!dMdJ       ZEy)Nzr An PyTorch implementation of Hiera

Adapted for timm from originals at https://github.com/facebookresearch/hiera
    N)partial)DictListOptionalTupleTypeUnionIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)
DropPathMlp
LayerScaleClNormMlpClassifierHeaduse_fused_attn_assertget_norm_layer	to_2tupleinit_weight_vitinit_weight_jax   )generate_default_cfgsregister_model)build_model_with_cfg)feature_take_indices)register_notrace_function)named_apply
checkpointHieranreturnc                     t         j                  t         j                  t         j                  t         j                  g|    S )z
    Returns a conv with nd (e.g., Conv2d for n=2). Work up to n=3.
    If you wanted a 4d Hiera, you could probably just implement this for n=4. (no promises)
    )nnIdentityConv1dConv2dConv3d)r    s    M/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/hiera.pyconv_ndr)   /   s(    
 KKBIIryy9!<<    target_sizemaskc                     ||S t        t        |j                  dd        t        |       k(  d       |j                  dd  | k7  r%t        j                  |j                         |       S |S )N   z.mask spatial shape and target_size must match.)size)r   lenshapeFinterpolatefloatr+   r,   s     r(   get_resized_maskr6   7   s`     |C

123{#335efzz!"~$}}TZZ\<<Kr*   xr1   mu_shapec                    t        |      }| j                  d   | j                  d   }}t        ||      D cg c]
  \  }}||z   }}} | j                  |g||| } dgt	        t        t        dd|z         t        d|z   dd|z  z               D 	cg c]  }	t        |	       c}	g       z   t        | j                        dz
  gz   }
 | j                  |
      j                  |g|| } | S c c}}w c c}	w )a  
    Restore spatial organization by undoing windowed organization of mask units.

    Args:
        x: organized by mask units windows, e.g. in 2d [B, #MUy*#MUx, MUy, MUx, C]
        shape: current spatial shape, if it were not organized into mask unit
            windows, e.g. in 2d [B, #MUy*MUy, #MUx*MUx, C].
        mu_shape: current mask unit shape, e.g. in 2d [MUy, MUx]
    Returns:
        x: e.g. in 2d, [B, #MUy*MUy, #MUx*MUx, C]
    r   r   r.   )	r0   r1   zipviewsumrangelistpermutereshape)r7   r1   r8   DBCsmunum_MUspr@   s              r(   undo_windowingrI   D   s     	E
A771:qwwr{qA$'x$8951bqBw9G9q)7)X)q)A 

E!QUOU1q5!a!e)5L MN1tAwNPR
S	Tqww<!
	 
 	#		'""10u0a0AH : Os   C3C9
c            	            e Zd ZdZdeedf   deedf   deeedf      f fdZdej                  dej                  fd	Z
 xZS )
Unrolla>  
    Reorders the tokens such that patches are contiguous in memory.
    E.g., given [B, (H, W), C] and stride of (Sy, Sx), this will re-order the tokens as
                           [B, (Sy, Sx, H // Sy, W // Sx), C]

    This allows operations like Max2d to be computed as x.view(B, Sx*Sy, -1, C).max(dim=1).
    Not only is this faster, but it also makes it easy to support inputs of arbitrary
    dimensions in addition to patch-wise sparsity.

    Performing this operation multiple times in sequence puts entire windows as contiguous
    in memory. For instance, if you applied the stride (2, 2) 3 times, entire windows of
    size 8x8 would be contiguous in memory, allowing operations like mask unit attention
    computed easily and efficiently, while also allowing max to be applied sequentially.

    Note: This means that intermediate values of the model are not in HxW order, so they
    need to be re-rolled if you want to use the intermediate values as a HxW feature map.
    The last block of the network is fine though, since by then the strides are all consumed.
    
input_size.patch_strideunroll_schedulec                     t         |           t        ||      D cg c]
  \  }}||z   c}}| _        || _        y c c}}w N)super__init__r;   r/   schedule)selfrL   rM   rN   irE   	__class__s         r(   rR   zUnroll.__init__y   s>     	(+J(EF1Q!VF	' Gs   ?r7   r!   c           
         |j                   \  }}}| j                  } |j                  |g|z   |gz    }| j                  D ]  }t	        ||      D cg c]
  \  }}||z   }}}|gt        t	        ||      D cg c]	  \  }}||g c}}g       z   |gz   }	|j                  |	      }t        |	      }
dgt        t        d|
dz
  d            z   t        t        d|
dz
  d            z   |
dz
  gz   }|j                  |      }|j                  dt        |            }|t        j                  |      z  } |j                  dt        j                  | j                        |      }|S c c}}w c c}}w )z
        Input: Flattened patch embeddings [B, N, C]
        Output: Patch embeddings [B, N, C] permuted such that [B, 4, N//4, C].max(1) etc. performs MaxPoolNd
        r   r.   r   r:   )r1   r/   r<   rS   r;   r=   r0   r?   r>   r@   flattenmathprodrA   )rT   r7   rC   _rD   cur_sizestridesrU   rE   	new_shapeLr@   s               r(   forwardzUnroll.forward   si   
 ''1a99AFFaS8^qc)+}} 	$G
 ,/x+AB41aQBHBcc(G6L"MdaAq6"MrRRVWUXXIy!A IAcDq!a%!344tE!QUA<N7OOSTWXSXRYYG		'"A 		!S\*A7##A#	$& IIb$))DII.2 C"Ms   E):E/__name__
__module____qualname____doc__r   intr   rR   torchTensorr`   __classcell__rV   s   @r(   rK   rK   e   sa    &(c3h(  S/( "%S/2	( %,, r*   rK   c            
            e Zd ZdZdeedf   deedf   deeedf      dee   def
 fdZ	 dd	ej                  d
edej                  dej                  fdZ
 xZS )RerollzQ
    Undos the "unroll" operation so that you can use intermediate features.
    rL   .rM   rN   
stage_endsq_poolc                 z   t         
|           t        ||      D cg c]
  \  }}||z   c}}| _        i | _        | j                  }t        |d   dz         D ]R  }||f| j                  |<   ||d | v st        |      dkD  r$t        ||d         D 	cg c]
  \  }	}|	|z   }}	}|dd  }T y c c}}w c c}}	w )Nr:   r   r   )rQ   rR   r;   r/   rS   r>   r0   )rT   rL   rM   rN   rm   rn   rU   rE   r/   r    rV   s             r(   rR   zReroll.__init__   s     	(+J(EF1Q!VF	 yyz"~)* 	6A.4DMM!Jw'''!+/249K/LMtq!AFMDM"1!""5	6 G Ns   B1B7r7   	block_idxr,   r!   c                    | j                   |   \  }}|j                  \  }}}t        |      }	dg|	z  }
|D ]  } |j                  |g||t	        j
                  |      z  |
| }t        |j                        }dd|	z   gt        t        t        dd|	z         t        d|	z   dz   |dz
              D cg c]  }t        |       c}g       z   |dz
  gz   }|j                  |      }t        |	      D ]  }|
|xx   ||   z  cc<     |j                  |dg|
| }|j                  d   }  |j                  ||g|
| }||S t        |||
      }|S c c}w )a&  
        Roll the given tensor back up to spatial order assuming it's from the given block.

        If no mask is provided:
            - Returns [B, H, W, C] for 2d, [B, T, H, W, C] for 3d, etc.
        If a mask is provided:
            - Returns [B, #MUs, MUy, MUx, C] for 2d, etc.
        r   r   r:   )rS   r1   r0   r<   rY   rZ   r=   r;   r>   r?   r@   rA   rI   )rT   r7   rp   r,   rS   r/   rC   NrD   rB   cur_mu_shaper]   r_   rH   r@   rU   s                   r(   r`   zReroll.forward   s    y1$''1aIsQw 	GqN7NA7);$;NlNANA AGGAAE
E!QUOU1q519aRSe=T(UV1tAwVXZ[\q5' 
 		'"A 1X .Q71:-.		!R2,22A
A%	* AFF1a*,** H 1dL1+ Ws   6ErP   ra   rj   s   @r(   rl   rl      s    6c3h6  S/6 "%S/2	6
 S	6 66 "&	2||2 2 ,,	2
 
2r*   rl   c                        e Zd ZU dZej
                  j                  e   ed<   	 	 	 dde	de	de	de	de	def fd	Z
d
ej                  dej                  fdZ xZS )MaskUnitAttentionz
    Computes either Mask Unit or Global Attention. Also is able to perform q pooling.

    Note: this assumes the tokens have already been flattened and unrolled into mask units.
    See `Unroll` for more details.
    
fused_attndimdim_outheadsq_stridewindow_sizeuse_mask_unit_attnc                 B   t         |           || _        || _        || _        || _        ||z  | _        | j                  dz  | _        t               | _	        t        j                  |d|z        | _        t        j                  ||      | _        || _        || _        y)a  
        Args:
        - dim, dim_out: The input and output feature dimensions.
        - heads: The number of attention heads.
        - q_stride: If greater than 1, pool q with this stride. The stride should be flattened (e.g., 2x2 = 4).
        - window_size: The current (flattened) size of a mask unit *after* pooling (if any).
        - use_mask_unit_attn: Use Mask Unit or Global Attention.
        g         N)rQ   rR   rw   rx   ry   rz   head_dimscaler   rv   r#   Linearqkvprojr{   r|   )rT   rw   rx   ry   rz   r{   r|   rV   s          r(   rR   zMaskUnitAttention.__init__   s    " 	
 5(]]d*
(*99S!g+.IIgw/	&"4r*   r7   r!   c                    |j                   \  }}}| j                  r|| j                  | j                  z  z  nd}| j	                  |      j                  |d|d| j                  | j                        j                  dddddd      }|j                  d      \  }}}	| j                  dkD  rD|j                  || j                  || j                  d| j                        j                  d      }| j                  rt        j                  |||	      }n9|| j                  z  |j!                  dd	      z  }
|
j#                  d      }
|
|	z  }|j!                  dd      j                  |d| j$                        }| j'                  |      }|S )
z5 Input should be of shape [batch, tokens, channels]. r   r:   r~   r      r.      rw   )r1   r|   rz   r{   r   rA   ry   r   r@   unbindr<   amaxrv   r2   scaled_dot_product_attentionr   	transposesoftmaxrx   r   )rT   r7   rC   rr   r[   num_windowsr   qkvattns              r(   r`   zMaskUnitAttention.forward  sR   ''1aCGCZCZqT]]T-=-==>`ahhqk!!!RaT]]S[[\]_`bcefhiklm**Q-1a==1q$**k4=="dmmTYY^_Y`A??..q!Q7A

Nakk"b&99D<<B<'DqAKK1%%aT\\:IIaLr*   )r   r   F)rb   rc   rd   re   rg   jitFinalbool__annotations__rf   rR   rh   r`   ri   rj   s   @r(   ru   ru      s     		%%  ',55 5 	5
 5 5 !%5B %,, r*   ru   c                        e Zd Zdddej                  ej
                  ddddf	ded	ed
edededee   de	ej                     de	ej                     dedededef fdZdej                  dej                  fdZ xZS )
HieraBlock      @        Nr   r   TFrw   rx   ry   	mlp_ratio	drop_pathinit_values
norm_layer	act_layerrz   r{   use_expand_projr|   c                    t         |           || _        || _         ||      | _        ||k7  r7d| _        |rt        j                  ||      | _        n ||dz  k(  sJ d | _        nd| _        d | _        t        ||||	|
|      | _
        |t        ||      nt        j                         | _        |dkD  rt        |      nt        j                         | _         ||      | _        t#        |t%        ||z        |      | _        |t        ||      nt        j                         | _        |dkD  rt        |      | _        y t        j                         | _        y )NTr.   F)r   r   )r   )rQ   rR   rw   rx   norm1	do_expandr#   r   r   ru   r   r   r$   ls1r   
drop_path1norm2r   rf   mlpls2
drop_path2)rT   rw   rx   ry   r   r   r   r   r   rz   r{   r   r|   rV   s                r(   rR   zHieraBlock.__init__7  s1    	_
'>!DNIIc73	#'))) 	"DNDI%
	 DOCZ:g;?`b`k`k`m1:Q(9-BKKM(
wGi$7 8INCNCZ:g;?`b`k`k`m1:Q(9-BKKMr*   r7   r!   c           
      z   | j                  |      }| j                  r)| j                  d| j                  |      }|j                  |j                  d   | j
                  j                  d|j                  d         j                  d      }nt        j                  |j                  |j                  d   | j
                  j                  d|j                  d         j                  d      |j                  |j                  d   | j
                  j                  d|j                  d         j                  d      gd      }|| j                  | j                  | j                  |                  z   }|| j                  | j                  | j                  | j!                  |                        z   }|S )Nr   r:   r   r   )r   r   r   r<   r1   r   rz   r   rg   catmeanr   r   r   r   r   r   )rT   r7   x_norms      r(   r`   zHieraBlock.forwarde  sT   A>>yy$IIf%FF1771:tyy'9'92qwwr{KPPUVPWIIFF1771:tyy'9'92qwwr{KPPUVPWFF1771:tyy'9'92qwwr{KPPUVPW 	 6): ;<< $**Q-)@ ABBr*   )rb   rc   rd   r#   	LayerNormGELUrf   r4   r   r   Moduler   rR   rg   rh   r`   ri   rj   s   @r(   r   r   6  s      #"+/*,,,)+ $(',,R,R ,R 	,R
 ,R ,R "%,R RYY,R BII,R ,R ,R ",R !%,R\ %,, r*   r   c                        e Zd ZdZ	 ddededeedf   deedf   deedf   def fd	Z	 dd
ej                  de
ej                     dej                  fdZ xZS )
PatchEmbedzHPatch embed that supports any number of spatial dimensions (1d, 2d, 3d).dim_inrx   kernel.stridepaddingrA   c                     t         |           t        |      | _        || _         t        | j                        |||||      | _        y )N)kernel_sizer   r   )rQ   rR   r0   spatial_dimsrA   r)   r   )rT   r   rx   r   r   r   rA   rV   s          r(   rR   zPatchEmbed.__init__}  sM     	  K.GD--.
	r*   r7   r,   r!   c                 V   |Lt        |j                  dd  |      }| j                  ||j                  t        j
                        z        }n| j                  |      }| j                  r=|j                  |j                  d   |j                  d   d      j                  dd      }|S )Nr.   r5   r   r   r:   )r6   r1   r   torg   r   rA   r   rT   r7   r,   s      r(   r`   zPatchEmbed.forward  s    
 #$GD		!dggejj112A		!A<<		!''!*aggaj"5??1EAr*   TrP   )rb   rc   rd   re   rf   r   r   rR   rg   rh   r   r`   ri   rj   s   @r(   r   r   z  s    R !

 
 #s(O	

 #s(O
 38_
 
2 ,0|| 5<<( 
	r*   r   c            =           e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d>deedf   dededededed	eedf   d
edeedf   deedf   deedf   dedededeedf   deedf   deedf   dededee   dedede	ee
j                  f   dedededededeeef   f: fd Zd! Zej                  j                   d"        Zej                  j                   d?d#ed$efd%       Zej                  j                   d@d&ed$dfd'       Zej                  j                   d(        ZdAdedee   d)efd*Zd+ej.                  d,ed$ej.                  fd-Zd$ej.                  fd.Z	 	 	 	 	 	 	 dBd+ej.                  d/eej.                     d0ee	eee   f      d1ed2ed3ed4ed#ed$e	eej.                     eej.                  eej.                     f   f   fd5Z	 	 	 	 dCd0e	eee   f   d6ed7ed#efd8Z	 	 dAd+ej.                  d/eej.                     d9ed$ej.                  fd:Zd?d;ed$ej.                  fd<Z	 dDd+ej.                  d/eej.                     d$ej.                  fd=Z xZ S )Er   Nimg_size.in_chans	embed_dim	num_headsnum_classesglobal_poolstagesrn   rz   mask_unit_sizemask_unit_attnr   dim_mulhead_mulpatch_kernelrM   patch_paddingr   drop_path_rater   fix_initweight_initr   	drop_ratepatch_drop_ratehead_init_scalesep_pos_embedabs_win_pos_embedglobal_pos_sizec                    t         ,|           || _        d| _        t	        |      }t        |t              rt        |      }|| _        t        ||      D cg c]
  \  }}||z   c}}| _
        t        j                  | j                        } t        j                  |
      }!t        j                  |	      }"|t        |      k  sJ ||	c| _        | _        |!|
c| _        | _        t        | j                  | j"                        D cg c]
  \  }}||z   c}}| _        t'        dt        |      dz         D cg c]  }t)        |d |       dz
   c}| _        || _        t/        |||||      | _        d | _        d | _        d | _        d | _        |rt;        j<                  t?        j@                  d| j                  d   | j                  d   z  |            | _        t;        j<                  t?        j@                  d| j                  d   |            | _        n|r_t;        j<                  t?        j@                  d|g|       | _        t;        j<                  t?        j@                  d|g|
       | _        n/t;        j<                  t?        j@                  d| |            | _        tC        |||	gt        | j*                  d d       z        | _"        tG        |||	gt        | j*                  d d       z  | j*                  |      | _$        | j*                  d | D #cg c]  }#|#dz   	 }$}#d}%t)        |      }&t?        jJ                  d||&      D #cg c]  }#|#jM                          }'}#t;        jN                         | _(        g | _)        t'        |&      D ]  }|}(||%   })|dz
  | j*                  v r*t        ||z        }(t        ||z        }|%dz  }%||$v r|!|"z  }!tU        ||(|||'|   ||||$v r|"nd|!||)      }*|(}|| j*                  v r8| xjR                  tW        |(d|%dz   z  d| j*                  |%          gz  c_)        | jP                  jY                  |*        |x| _-        | _.        t_        |||||d	
      | _0        |rWt:        jb                  je                  | j6                  d       t:        jb                  je                  | j8                  d       nn| j2                  +t:        jb                  je                  | j2                  d       | j4                  +t:        jb                  je                  | j4                  d       |dk7  r*|dk(  rtf        nth        }+tk        |+d      }+tm        |+|        |r| jo                          t        | j`                  jp                  t:        jr                        rs| j`                  jp                  jt                  jv                  jy                  |       | j`                  jp                  jz                  jv                  jy                  |       y y c c}}w c c}}w c c}w c c}#w c c}#w )NFr   r.   r   r:   )rw   rx   ry   r   r   r   r   rz   r{   r   r|   zblocks.)num_chs	reductionmoduleNLC)	pool_typer   r   	input_fmtg{Gz?)stdskipjaxhead.fc)classifier_name)>rQ   rR   r   grad_checkpointingr   
isinstancerf   r   rM   r;   tokens_spatial_shaperY   rZ   r0   rn   rz   mu_sizer   mask_spatial_shaper>   r=   rm   r   r   patch_embed	pos_embedpos_embed_winpos_embed_spatialpos_embed_temporalr#   	Parameterrg   zerosrK   unrollrl   rerolllinspaceitem
ModuleListblocksfeature_infor   dictappendnum_featureshead_hidden_sizer   headinittrunc_normal_r   r   r   r   fix_init_weightfcr   weightdatamul_bias)-rT   r   r   r   r   r   r   r   rn   rz   r   r   r   r   r   r   rM   r   r   r   r   r   r   r   r   r   r   r   r   r   rU   rE   
num_tokensflat_mu_sizeflat_q_strider7   q_pool_blocks	cur_stagedepthdprrx   r|   blockinit_fnrV   s-                                               r(   rR   zHiera.__init__  s   B 	&"'#J/
h$ *H(8;Hl8S$T1Q!V$T!YYt889
yy0		(+F###%+X"T],8.)d)69$:S:SUYUhUh6i"jda16"j8=aVq8QR13vbqz?Q.R.%
 26599=:>%'\\At88;d>W>WXY>ZZ\ef&D" ')llAt88;YG'D# !!#ekk!Y.Y.Y!Z%'\\%++a2\^2\%]"!#ekk!Z.S!T JT__Sb122

 JT__Sb122OO
 )-(@A1QAA 	F!&>5!IJAqvvxJJmmou 	&AG "0	!:1u'i'12	H 45	Q	% ]2L#a&'%+,+=-1( /#5E  IDOO#!!A	!4DwW[WfWfgpWqVrMst&v v!KKu%?	&B 5>=D1+!!
	 GG!!$"8"8d!CGG!!$"9"9t!D~~)%%dnn$%?!!-%%d&8&8d%C& )4)=o?GgyAG&  "diillBII.IILL$$))/:IILL""''8 /o %U #kRX B
 Ks   X4X:>Y %YY
c                    d }t        | j                        D ]m  \  }} ||j                  j                  j                  j
                  |dz           ||j                  j                  j                  j
                  |dz          o y )Nc                 R    | j                  t        j                  d|z               y )N       @)div_rY   sqrt)param	_layer_ids     r(   rescalez&Hiera.fix_init_weight.<locals>.rescaleI  s    JJtyyy12r*   r   )	enumerater   r   r   r   r   r   fc2)rT   r  layer_idlayers       r(   r   zHiera.fix_init_weightH  si    	3  )5 	=OHeEJJOO**//A>EIIMM((--x!|<	=r*   c                 H    | j                   dgS | j                  ddgS ddgS )Nr   pos_embed_absr   r   r   )r   r  rT   s    r(   no_weight_decayzHiera.no_weight_decayP  s7    >>%= +#_55')=>>r*   coarser!   c                      t        dddg      S )NzW^pos_embed|pos_embed_spatial|pos_embed_temporal|pos_embed_abs|pos_embed_win|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr   )r   )rT   r  s     r(   group_matcherzHiera.group_matcherY  s    k-/CD
 	
r*   enablec                     || _         y rP   )r   )rT   r  s     r(   set_grad_checkpointingzHiera.set_grad_checkpointing`  s
    "(r*   c                 .    | j                   j                  S rP   )r   r   r  s    r(   get_classifierzHiera.get_classifierd  s    yy||r*   reset_otherc                 N    || _         | j                  j                  |||       y )Nr#  )r   r   reset)rT   r   r   r#  s       r(   reset_classifierzHiera.reset_classifierh  s     &		[kJr*   r7   
mask_ratioc                    |j                   d   }t        j                  | j                        }t	        |d|z
  z        }t        j                  |||j                        }t        j                  |d      }t        j                  |d      }t        j                  ||g|j                        }	d|	ddd|f<   t        j                  |	d|      }	|	j                         S )z
        Generates a random mask, mask_ratio fraction are dropped.
        1 is *keep*, 0 is *remove*. Useful for MAE, FLIP, etc.
        r   r   )devicer   N)rw   index)r1   rY   rZ   r   rf   rg   randr*  argsortr   gatherr   )
rT   r7   r(  rC   r   len_keepnoiseids_shuffleids_restorer,   s
             r(   get_random_maskzHiera.get_random_maskl  s    
 GGAJii 7 78{a*n56

1k!((; mmEq1mmKQ7 {{A{+AHH=Q		\||Da{;yy{r*   c                 &   | j                   || j                   j                  | j                        }t        j                  | j
                  |j                  dd  dd      }||z   }|j                  d      j                  dd      }n| j
                  | j
                  }nj| j                  j                  d| j                  d   d      t        j                  | j                  | j                  d   | j                  d   z  d      z   }||z   }|S )	Nr   bicubicT)r/   mode	antialiasr.   r   r   r   )r   tiler   r2   r3   r   r1   rX   r   r   repeatr   rg   repeat_interleaver   )rT   r7   r   r   s       r(   
_pos_embedzHiera._pos_embed  s
   ) !..33D4K4KLM"((-	I "M1I!))!,66q!<I^^'I &&--a1J1J11MqQ''++--a043L3LQ3OO  	Mr*   r,   indicesnorm
stop_early
output_fmtintermediates_onlyc	           	      X   |rJ d       |dv sJ d       |rNt        t        | j                        |      \  }	}
|	D cg c]  }| j                  |    }	}| j                  |
   }
n"t        t        | j                        |      \  }	}
|, |j                  |j
                  d   dg| j                   }nd}| j                  ||      }| j                  |      }| j                  |      }|[||d   j                  d| j                  |j
                  d	            j	                  |j
                  d   d
|j
                  d
         }g }t        j                  j                         s|s| j                  }n| j                  d|
dz    }t        |      D ]  \  }}| j                   r+t        j                  j                         st#        ||      }n ||      }||	v sJ| j%                  |||      }|j'                  |dk(  r|j)                  dddd	      n|        |r|S ||fS c c}w )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        z'normalization of features not supported)NCHWNHWCz(Output format must be one of NCHW, NHWC.Nr   r   r,   .Nr.   r:   rB  r~   )r   r0   rm   r   r<   r1   r   r   r;  r   r8  r   rg   r   is_scriptingr  r   r   r   r   r@   )rT   r7   r,   r<  r=  r>  r?  r@  r  take_indices	max_indexrU   
patch_maskintermediatesr   blkx_ints                    r(   forward_intermediateszHiera.forward_intermediates  s   . BBBx--Y/YY-&:3t;OQX&Y#L)8DE1DOOA.ELE	2I&:3t{{;KW&U#L)"1771:qK43J3JKJJQZ0OOAKKN $y/&&q$,,
CDII!''RS*VXZ[ZaZabdZefA99!!#:[[F[[)a-0F' 	cFAs&&uyy/E/E/GsA&FL Aqt4$$*PVBVU]]1aA%>\ab	c   -E Fs   H'
prune_norm
prune_headc                    |r2t        t        | j                        |      \  }}| j                  |   }n"t        t        | j                        |      \  }}| j                  d|dz    | _        |r| j                  j                  dd       |S )z@ Prune layers not required for specified intermediates.
        Nr   r   Tr%  )r   r0   rm   r   r   r&  )rT   r<  rN  rO  r  rG  rH  s          r(   prune_intermediate_layerszHiera.prune_intermediate_layers  s}     &:3t;OQX&Y#L)	2I&:3t{{;KW&U#L)kk.9q=1IIOOA4O0r*   return_intermediatesc                 4   | j                   r0| j                  dkD  r!|J | j                  || j                        }|, |j                  |j                  d   dg| j
                   }nd}| j                  ||      }| j                  |      }| j                  |      }|[||d   j                  d| j                  |j                  d            j                  |j                  d   d|j                  d         }g }t        | j                        D ]y  \  }}| j                  r+t        j                  j!                         st#        ||      }n ||      }|sH|| j$                  v sW|j'                  | j)                  |||             { |r||fS |S )	z
        mask should be a boolean tensor of shape [B, #MUt*#MUy*#MUx] where #MU are the number of mask units in that dim.
        Note: 1 in mask is *keep*, 0 is *remove*; mask.sum(dim=-1) should be the same across the batch.
        r   N)r(  r   rD  rE  r.   r:   )trainingr   r3  r<   r1   r   r   r;  r   r8  r   r  r   r   rg   r   rF  r   rm   r   r   )rT   r7   r,   rR  rI  rJ  rU   rK  s           r(   forward_featureszHiera.forward_features  s    ==T11A5<<''d6J6J'KD"1771:qK43J3JKJJQZ0OOAKKN $y/&&q$,,
CDII!''RS*VXZ[ZaZabdZefA, 	CFAs&&uyy/E/E/GsA&F#T__(<$$T[[AD[%AB	C  m##r*   
pre_logitsc                 V    |r| j                  ||      }|S | j                  |      }|S )N)rV  )r   )rT   r7   rV  s      r(   forward_headzHiera.forward_head  s1    3=DIIaJI/ DH99Q<r*   c                 R    | j                  ||      }|| j                  |      }|S )NrD  )rU  rX  r   s      r(   r`   zHiera.forward#  s3    
 !!!$!/<!!!$Ar*   ))   rZ  r~   `   r     avgr.   r~      r~   r~   )r.   r.   )   r`  )TTFFTr  r  )   ra  )r   r   )r~   r~   r   r   NT r   r   r   gMbP?FF)   rc  Fr   )NF)NNFTrB  FT)r   FTTrP   )!rb   rc   rd   r   rf   strr   r4   r   r	   r#   r   rR   r   rg   r   ignorer  r   r  r   r"  r'  rh   r3  r;  r   rM  rQ  rU  rX  r`   ri   rj   s   @r(   r   r     s    )3#$&3(..4/I$( !,2,2-3"$'+/!!0;"%(%*"'&+/7?b9CHob9 b9 	b9
 b9 b9 b9 #s(Ob9 b9 CHob9 "#s(Ob9 "$),b9 "b9 b9  !b9"  S/#b9$  S/%b9& !c?'b9( )b9* "+b9, "%-b9. /b90 1b92 c299n-3b94 5b96 #7b98 #9b9:  ;b9<  $=b9> #38_?b9H= YY? ? YY
D 
T 
 
 YY)T )T ) ) YY KC Khsm Kae K 5 U\\ 0u|| > ,07;#$',= ||=  5<<(=  eCcN34	= 
 =  =  =  !%=  =  
tELL!5tELL7I)I#JJ	K= B ./$#3S	>*  	
 , ,0).	+||+ 5<<(+ #'	+
 
+Z$ 5<<  ,0|| 5<<( 
	r*   c                 2    | ddd dddt         t        ddd|S )	Nr\  )r~   rZ  rZ  g?r5  Tzpatch_embed.projr   )urlr   rL   	pool_sizecrop_pctinterpolationfixed_input_sizer   r   
first_conv
classifierr
   )rh  kwargss     r(   _cfgrp  .  s2    =t%.B(	  r*   zhiera_tiny_224.mae_in1k_ft_in1kztimm/zcc-by-nc-4.0)	hf_hub_idlicensezhiera_tiny_224.mae)rq  rr  r   z hiera_small_224.mae_in1k_ft_in1kzhiera_small_224.maezhiera_base_224.mae_in1k_ft_in1kzhiera_base_224.maez$hiera_base_plus_224.mae_in1k_ft_in1kzhiera_base_plus_224.maez hiera_large_224.mae_in1k_ft_in1kzhiera_large_224.maezhiera_huge_224.mae_in1k_ft_in1kzhiera_huge_224.maez.hiera_small_abswin_256.sbb2_e200_in12k_ft_in1k)r~      rs  gffffff?)rq  rL   rj  z1hiera_small_abswin_256.sbb2_pd_e200_in12k_ft_in1kz&hiera_small_abswin_256.sbb2_e200_in12ki-.  )rq  r   rL   rj  z)hiera_small_abswin_256.sbb2_pd_e200_in12kzhiera_base_abswin_256.untrained)rL   rj  c                 0   | j                  d|       } i }| j                         D ]n  \  }}d|v r|j                  dd      }|j                  d      r|j                  dd      }n#|j                  d      r|j                  dd      }|dk(  rd}|||<   p |S )	Nmodel_statezhead.projection.zhead.fc.zencoder_norm.z
head.norm.znorm.r  r   )getitemsreplace
startswith)
state_dictmodeloutputr   r   s        r(   checkpoint_filter_fnr}    s    z:JF  " 1 "		,j9A<<(		/<8A\\'"		'<0AAq	%& Mr*   variant
pretrainedc                 n    |j                  dd      }t        t        | |ft        t	        |d      d|S )Nout_indicesr   getter)r  feature_cls)pretrained_filter_fnfeature_cfg)popr   r   r}  r   )r~  r  ro  r  s       r(   _create_hierar    sF    **]A.K 2[hG  r*   c           	      L    t        ddd      }t        dd| it        |fi |S )Nr[  r   )r   r.   ra  r.   r   r   r   r  )hiera_tiny_224r   r  r  ro  
model_argss      r(   r  r    s.    aEJ_j_DD^W]D^__r*   c           	      L    t        ddd      }t        dd| it        |fi |S )Nr[  r   r   r.      r.   r  r  )hiera_small_224r  r  s      r(   r  r    s.    aFJ`z`T*E_X^E_``r*   c           	      L    t        ddd      }t        dd| it        |fi |S )Nr[  r   r^  r  r  )hiera_base_224r  r  s      r(   r  r    s.    aFJ_j_DD^W]D^__r*   c           	      L    t        ddd      }t        dd| it        |fi |S )Np   r.   r^  r  r  )hiera_base_plus_224r  r  s      r(   r  r    s.    qGJd:djIc\bIcddr*   c           	      L    t        ddd      }t        dd| it        |fi |S )N   r.   r.      $   r   r  r  )hiera_large_224r  r  s      r(   r  r    s.    qGJ`z`T*E_X^E_``r*   c           	      L    t        ddd      }t        dd| it        |fi |S )Nrs  r   r  r  r  )hiera_huge_224r  r  s      r(   r  r    s.    qGJ_j_DD^W]D^__r*   c           
      V    t        dddddddd	      }t        dd
| it        |fi |S )Nr[  r   r  T)r_  r_  h㈵>r   F)r   r   r   r   r   r   r   r   r  )hiera_small_abswin_256r  r  s      r(   r  r    s@    -4aieUJ gjgDQ[Lf_eLfggr*   c           	      R    t        dddddd      }t        d	d| it        |fi |S )
Nr[  r   r^  Tr  r   )r   r   r   r   r   r   r  )hiera_base_abswin_256r  r  s      r(   r  r    s;    -4]aotvJfZf4PZKe^dKeffr*   )rb  rP   rd  )Fre   rY   	functoolsr   typingr   r   r   r   r   r	   rg   torch.nnr#   torch.nn.functional
functionalr2   	timm.datar   r   timm.layersr   r   r   r   r   r   r   r   r   r   	_registryr   r   _builderr   	_featuresr   _features_fxr   _manipulater   r   __all__rf   r   r)   rh   r6   rI   rK   rl   ru   r   r   r   rp  default_cfgsr}  re  r   r  r  r  r  r  r  r  r  r   r*   r(   <module>r     s  0   ; ;     AI I I = * + 3 0 )=s =tBII = 	$s) 	5<< 	ELL 	 	<<Cy s) \\	B;RYY ;|NRYY Nb?		 ?DA AH% %PIBII IX % S&%t(S&
 $S& ')S& 4S&* &t(+S&2 $3S&> +D-?S&F t GS&R ')SS&Z 4[S&f &t(gS&n $oS&z 5d 47{S&B 8 4:CS&J -d 4/KS&T 0 42US&^ &t 4(_S& Sl2
3 
D 
u 
 ` `
 a a
 ` `
 e e
 a a
 ` `
 h h g gr*   