
    khI              	          d Z ddlmZmZmZmZmZmZmZ ddl	Z	ddl
mZ ddlmc mZ ddlmZmZ ddlmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZm Z  dgZ! G d dejD                        Z# G d dejH                        Z% G d dejH                        Z& G d dejD                        Z' G d dejD                        Z( G d dejD                        Z) G d dejD                        Z* G d dejD                        Z+ G d dejD                        Z,dee-e	j\                  f   dejD                  dee-e	j\                  f   fd Z/d0d!e-d"edee-ef   fd#Z0 e  e0d$%       e0d$%       e0d$%       e0d$d&'      d(      Z1d1d)e-d*e2d"ede,fd+Z3ed1d*e2d"ede,fd,       Z4ed1d*e2d"ede,fd-       Z5ed1d*e2d"ede,fd.       Z6ed1d*e2d"ede,fd/       Z7y)2a  SHViT
SHViT: Single-Head Vision Transformer with Memory Efficient Macro Design
Code: https://github.com/ysj9909/SHViT
Paper: https://arxiv.org/abs/2401.16456

@inproceedings{yun2024shvit,
  author={Yun, Seokju and Ro, Youngmin},
  title={SHViT: Single-Head Vision Transformer with Memory Efficient Macro Design},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  pages={5756--5767},
  year={2024}
}
    )AnyDictListOptionalSetTupleUnionNIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)
GroupNorm1SqueezeExciteSelectAdaptivePool2d	LayerTypetrunc_normal_   )build_model_with_cfg)feature_take_indices)checkpoint_seq)register_modelgenerate_default_cfgsSHViTc                        e Zd Zdej                  f fdZdej                  dej                  fdZ ej                         dej                  fd       Z
 xZS )Residualmc                 0    t         |           || _        y N)super__init__r   )selfr   	__class__s     M/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/shvit.pyr   zResidual.__init__   s        xreturnc                 *    || j                  |      z   S r   )r   r    r$   s     r"   forwardzResidual.forward#   s    466!9}r#   c                    t        | j                  t              r| j                  j                         }|j                  |j
                  k(  sJ t        j                  |j                  j                  d   |j                  j                  d   dd      }t        j                  |g d      }|xj                  |j                  |j                  j                        z  c_        |S | S )Nr   r   )r   r   r   r   )
isinstancer   
Conv2dNormfusegroupsin_channelstorchonesweightshapeFpadtodevice)r    r   identitys      r"   r,   zResidual.fuse&   s    dffj)A88q}},-,zz!((.."3QXX^^A5F1MHuuXy1HHHAHHOO44HHKr#   )__name__
__module____qualname__nnModuler   r/   Tensorr(   no_gradr,   __classcell__r!   s   @r"   r   r      sU    "))  %,,  U]]_	bii 	 	r#   r   c                        e Zd Z	 	 	 	 d
dedededededef fdZ ej                         dej                  fd	       Z	 xZ
S )r+   r.   out_channelskernel_sizestridepaddingbn_weight_initc           
         t         |           | j                  dt        j                  |||||fddi|       | j                  dt        j
                  |             t        j                  j                  | j                  j                  |       t        j                  j                  | j                  j                  d       y )NcbiasFbnr   )r   r   
add_moduler;   Conv2dBatchNorm2dinit	constant_rJ   r1   rI   )	r    r.   rB   rC   rD   rE   rF   kwargsr!   s	           r"   r   zConv2dNorm.__init__4   s     	RYY{FG[JO[SY[ 	\bnn\:;
$''...9
$'',,*r#   r%   c                    | j                   j                         \  }}|j                  |j                  |j                  z   dz  z  }|j                  |d d d d d f   z  }|j
                  |j                  |j                  z  |j                  |j                  z   dz  z  z
  }t        j                  |j                  d      | j                  j                  z  |j                  d      |j                  dd  | j                  j                  | j                  j                  | j                  j                  | j                  j                  |j                  j                   |j                  j"                  	      }|j                  j$                  j'                  |       |j
                  j$                  j'                  |       |S )N      ?r   r      )	r.   rB   rC   rD   rE   dilationr-   r6   dtype)_modulesvaluesr1   running_varepsrI   running_meanr;   rL   sizerH   r-   r2   rD   rE   rT   r6   rU   datacopy_)r    rH   rJ   wbr   s         r"   r,   zConv2dNorm.fuseE   s:   $$&2II"&&0S88HHqD$,--GGboo		1R^^bff5LQT4TTTIIq	DFFMM166==FFNNVV__66==88??((..

 	
A	!r#   )r   r   r   r   )r8   r9   r:   intr   r/   r>   r;   rL   r,   r?   r@   s   @r"   r+   r+   3   sw    
  !"#++ + 	+
 + +  +" U]]_bii  r#   r+   c            	       ~     e Zd Z	 	 ddedededef fdZ ej                         de	j                  fd       Z xZS )	
NormLinearin_featuresout_featuresrI   stdc                 l   t         |           | j                  dt        j                  |             | j                  dt        j
                  |||             t        | j                  j                  |       |r5t        j                  j                  | j                  j                  d       y y )NrJ   l)rI   )re   r   )r   r   rK   r;   BatchNorm1dLinearr   rg   r1   rN   rO   rI   )r    rc   rd   rI   re   r!   s        r"   r   zNormLinear.__init__\   sx     	bnn[9:RYY{LtLMdffmm-GGdffkk1- r#   r%   c                    | j                   j                         \  }}|j                  |j                  |j                  z   dz  z  }|j
                  | j                  j                  | j                  j                  z  |j                  |j                  z   dz  z  z
  }|j                  |d d d f   z  }|j
                  $|| j                  j                  j                  z  }n<|j                  |d d d f   z  j                  d      | j                  j
                  z   }t        j                  |j                  d      |j                  d      |j                  j                  |j                  j                        }|j                  j                   j#                  |       |j
                  j                   j#                  |       |S )NrR   r   r   )r6   rU   )rV   rW   r1   rX   rY   rI   rJ   rZ   rg   Tviewr;   ri   r[   r6   rU   r\   r]   )r    rJ   rg   r^   r_   r   s         r"   r,   zNormLinear.fusej   s@   $$&AII"&&0S88GGdgg**TWW^^;r~~PRPVPV?V[^>^^^HHqqz!66>DFFMMOO#AAagJ&,,R0466;;>AIIaffQi188??!((..Y	A	!r#   )Tg{Gz?)r8   r9   r:   r`   boolfloatr   r/   r>   r;   ri   r,   r?   r@   s   @r"   rb   rb   [   s]    
 .. . 	.
 . U]]_bii  r#   rb   c                   |     e Zd Zej                  fdededef fdZdej                  dej                  fdZ
 xZS )PatchMergingdimout_dim	act_layerc                     t         |           t        |dz        }t        ||      | _         |       | _        t        ||ddd|      | _         |       | _        t        |d      | _	        t        ||      | _
        y )N      rS   r   r-   g      ?)r   r   r`   r+   conv1act1conv2act2r   seconv3)r    rr   rs   rt   hid_dimr!   s        r"   r   zPatchMerging.__init__{   sm    cAg,W-
K	!Q'J
K	.1
r#   r$   r%   c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }|S r   )ry   rz   r{   r|   r}   r~   r'   s     r"   r(   zPatchMerging.forward   sU    JJqMIIaLJJqMIIaLGGAJJJqMr#   r8   r9   r:   r;   ReLUr`   r   r   r/   r=   r(   r?   r@   s   @r"   rq   rq   z   s?    FHgg 2C 2# 2) 2 %,, r#   rq   c                   |     e Zd Zej                  fdededef fdZdej                  dej                  fdZ
 xZS )FFNrr   	embed_dimrt   c                     t         |           t        ||      | _         |       | _        t        ||d      | _        y )Nr   rF   )r   r   r+   pw1actpw2)r    rr   r   rt   r!   s       r"   r   zFFN.__init__   s5    c9-;iQ?r#   r$   r%   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r   )r   r   r   r'   s     r"   r(   zFFN.forward   s.    HHQKHHQKHHQKr#   r   r@   s   @r"   r   r      sC    HJ @C @C @I @ %,, r#   r   c                        e Zd ZdZeej                  fdededededef
 fdZ	de
j                  d	e
j                  fd
Z xZS )SHSAzSingle-Head Self-Attentionrr   qk_dimpdim
norm_layerrt   c                     t         |           |dz  | _        || _        || _        || _         ||      | _        t        ||dz  |z         | _        t        j                   |       t        ||d            | _        y )Ng      rS   r   r   )r   r   scaler   rr   r   pre_normr+   qkvr;   
Sequentialproj)r    rr   r   r   r   rt   r!   s         r"   r   zSHSA.__init__   sq     	t^
	"4(dFQJ$56MM)+z#sST/UV	r#   r$   r%   c                    |j                   \  }}}}t        j                  || j                  | j                  | j                  z
  gd      \  }}| j                  |      }| j                  |      }t        j                  || j                  | j                  | j                  gd      \  }	}
}|	j                  d      |
j                  d      |j                  d      }}
}	|	j                  dd      |
z  | j                  z  }|j                  d      }||j                  dd      z  j                  || j                  ||      }| j                  t        j                  ||gd            }|S )Nr   )rr   rS   rk   )r2   r/   splitr   rr   r   r   r   flatten	transposer   softmaxreshaper   cat)r    r$   B_HWx1x2r   qkvattns                r"   r(   zSHSA.forward   s$   WW
1aQDHHtyy,@ AKB]]2hhrl++cDKKdii#HaP1a))A,		!aiila1B#a'4::5|||#$..R((11!TYY1EIIeiiR23r#   )r8   r9   r:   __doc__r   r;   r   r`   r   r   r/   r=   r(   r?   r@   s   @r"   r   r      sh    $ %/#%77WW W 	W
 "W !W& %,, r#   r   c                        e Zd Zeej
                  fdedededededef fdZ	de
j                  d	e
j                  fd
Z xZS )
BasicBlockrr   r   r   typer   rt   c                 &   t         |           t        t        ||ddd|d            | _        |dk(  rt        t        |||||            | _        nt        j                         | _        t        t        |t        |dz                    | _        y )Nrw   r   r   )r-   rF   srS   )r   r   r   r+   convr   mixerr;   Identityr   r`   ffn)r    rr   r   r   r   r   rt   r!   s          r"   r   zBasicBlock.__init__   sv     	ZS!Q#VWXY	3;!$sFD*i"PQDJDJCSq\23r#   r$   r%   c                 l    | j                  |      }| j                  |      }| j                  |      }|S r   )r   r   r   r'   s     r"   r(   zBasicBlock.forward   s.    IIaLJJqMHHQKr#   r8   r9   r:   r   r;   r   r`   strr   r   r/   r=   r(   r?   r@   s   @r"   r   r      si     %/#%7744 4 	4
 4 "4 !4" %,, r#   r   c                        e Zd Zeej
                  fdedededededededef fd	Z	d
e
j                  de
j                  fdZ xZS )
StageBlockprev_dimrr   r   r   r   depthr   rt   c	                    t         
|           d| _        ||k7  rt        j                  t        t        ||ddd|            t        t        |t        |dz        |            t        |||      t        t        ||ddd|            t        t        |t        |dz        |                  nt        j                         | _        t        j                  t        |      D 	cg c]  }	t        ||||||       c}	 | _        y c c}	w )NFrw   r   rx   rS   )r   r   grad_checkpointingr;   r   r   r+   r   r`   rq   r   
downsampleranger   blocks)r    r   rr   r   r   r   r   r   rt   r   r!   s             r"   r   zStageBlock.__init__   s     	"' _ --Z(Aq!HMNS3x!|#4i@A3	2ZS!Q#>?Sc#'lI67
 #%++- 	 mmPUV[P\&
KLJsFD$
IF&
  &
s   %Dr$   r%   c                     | j                  |      }| j                  r6t        j                  j	                         st        | j                  |      }|S | j                  |      }|S r   )r   r   r/   jitis_scriptingr   r   r'   s     r"   r(   zStageBlock.forward   sS    OOA""599+A+A+Ct{{A.A  AAr#   r   r@   s   @r"   r   r      s}     %/#%77  	
    " !2 %,, r#   r   c                       e Zd Zddddddddd	eej
                  fd
edededeeeef   deeeef   deeeef   deeeef   deeeef   de	de
de
f fdZej                  j                  defd       Zej                  j                  d,dedeeef   fd       Zej                  j                  d-d       Zej                  j                  dej,                  fd       Zd.dedefdZ	 	 	 	 	 d/dej2                  deeeee   f      ded ed!ed"edeeej2                     eej2                  eej2                     f   f   fd#Z	 	 	 d0deeee   f   d$ed%efd&Zdej2                  dej2                  fd'Zd,dej2                  d(edej2                  fd)Z dej2                  dej2                  fd*Z! ejD                         d+        Z# xZ$S )1r   rw     avg)      i  )    @   `   )   r   r   )r   rS   rw   )r   r   r           in_chansnum_classesglobal_poolr   partial_dimr   r   types	drop_rater   rt   c                    t         |           || _        |	| _        g | _        |d   }t        j                  t        ||dz  ddd       |       t        |dz  |dz  ddd       |       t        |dz  |dz  ddd       |       t        |dz  |ddd            | _        g }|}t        t        |            D ]h  }|j                  t        |||   ||   ||   ||   ||   |
|             ||   }| j                  j                  t        |d|dz   z  d| 	             j t        j                  | | _        |d
   x| _        | _        t#        |      | _        |rt        j&                  d      nt        j(                         | _        |dkD  rt-        | j                   |      | _        y t        j(                         | _        y )Nr      rw   rS   r   rv   )r   rr   r   r   r   r   r   rt   zstages.)num_chs	reductionmodulerk   	pool_type)r   r   r   r   feature_infor;   r   r+   patch_embedr   lenappendr   dictstagesnum_featureshead_hidden_sizer   r   Flattenr   r   rb   head)r    r   r   r   r   r   r   r   r   r   r   rt   stem_chsr   prev_chsir!   s                   r"   r   zSHViT.__init__   s    	&" Q<==xQ1a8Kx1}h!mQ1=Kx1}h!mQ1=Kx1}h1a8
 s9~& 	gAMM*!aLay ^1XAh%#	 	 !|H$$T(a!A#hY`ab`cWd%ef	g mmV, 5>bMAD1/+F(3rzz!}FQTUoJt44kB	[][f[f[h	r#   r%   c                     t               S r   )setr    s    r"   no_weight_decayzSHViT.no_weight_decay5  s	    ur#   coarsec                 ,    t        d|rdnddg      }|S )Nz^patch_embedz^stages\.(\d+))z^stages\.(\d+).downsample)r   )z^stages\.(\d+)\.blocks\.(\d+)N)stemr   )r   )r    r   matchers      r"   group_matcherzSHViT.group_matcher9  s'     (.$485
 r#   c                 4    | j                   D ]	  }||_         y r   )r   r   )r    enabler   s      r"   set_grad_checkpointingzSHViT.set_grad_checkpointingD  s     	*A#)A 	*r#   c                 .    | j                   j                  S r   )r   rg   r   s    r"   get_classifierzSHViT.get_classifierI  s    yy{{r#   c                    || _         t        |      | _        |rt        j                  d      nt        j
                         | _        |dkD  rt        | j                  |      | _	        y t        j
                         | _	        y )Nr   r   r   )
r   r   r   r;   r   r   r   rb   r   r   )r    r   r   s      r"   reset_classifierzSHViT.reset_classifierM  s[    &/+F(3rzz!}FQTUoJt44kB	[][f[f[h	r#   r$   indicesnorm
stop_early
output_fmtintermediates_onlyc                 r   |dv sJ d       g }t        t        | j                        |      \  }}	| j                  |      }t        j
                  j                         s|s| j                  }
n| j                  d|	dz    }
t        |
      D ]#  \  }} ||      }||v s|j                  |       % |r|S ||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )NCHWzOutput shape must be NCHW.Nr   )	r   r   r   r   r/   r   r   	enumerater   )r    r$   r   r   r   r   r   intermediatestake_indices	max_indexr   feat_idxstages                r"   forward_intermediateszSHViT.forward_intermediatesT  s    * Y&D(DD&"6s4;;7G"Qi Q99!!#:[[F[[)a-0F(0 	(OHeaA<'$$Q'	(
   -r#   
prune_norm
prune_headc                     t        t        | j                        |      \  }}| j                  d|dz    | _        |r| j                  dd       |S )z@ Prune layers not required for specified intermediates.
        Nr   r    )r   r   r   r   )r    r   r  r  r   r   s         r"   prune_intermediate_layerszSHViT.prune_intermediate_layers~  sM     #7s4;;7G"Qikk.9q=1!!!R(r#   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r'   s     r"   forward_featureszSHViT.forward_features  s$    QKKNr#   
pre_logitsc                     | j                  |      }| j                  |      }| j                  dkD  r,t        j                  || j                  | j
                        }|r|S | j                  |      S )Nr   )ptraining)r   r   r   r3   dropoutr  r   )r    r$   r  s      r"   forward_headzSHViT.forward_head  sZ    QLLO>>B		!t~~FAq0DIIaL0r#   c                 J    | j                  |      }| j                  |      }|S r   )r
  r  r'   s     r"   r(   zSHViT.forward  s'    !!!$a r#   c                      fd |        y )Nc                     | j                         D ]?  \  }}t        |d      r&|j                         }t        | ||        |       8 |       A y )Nr,   )named_childrenhasattrr,   setattr)net
child_namechildfusedfuse_childrens       r"   r  z!SHViT.fuse.<locals>.fuse_children  sQ    %(%7%7%9 )!
E5&)!JJLECU3!%(!%()r#    )r    r  s    @r"   r,   z
SHViT.fuse  s    	) 	dr#   F)T)r   )NFFr   F)r   FT)%r8   r9   r:   r   r;   r   r`   r   r   ro   r   r   r/   r   ignorer   r   rn   r   r   r   r   r<   r   r   r=   r   r	   r   r  r  r
  r  r(   r>   r,   r?   r@   s   @r"   r   r      s    #$.=0<+7*3*9!$.#%775i5i 5i 	5i
 S#s]+5i sC}-5i #sC-(5i c3'5i c3'5i 5i "5i !5in YY   YYD T#s(^   YY* * YY		  iC ic i 8<$$',( ||(  eCcN34(  	( 
 (  (  !%(  
tELL!5tELL7I)I#JJ	K( X ./$#	3S	>*  	%,, 5<< 
1ell 1 1 1 %,, 
 U]]_
 
r#   
state_dictmodelr%   c                 *    | j                  d|       } | S )Nr   )get)r  r   s     r"   checkpoint_filter_fnr#    s    4J: r#   urlrP   c                 6    | dddddt         t        dddd	d
d|S )Nr   )rw      r&  )rv   rv   g      ?bicubiczpatch_embed.0.czhead.lzarXiv:2401.16456zHSHViT: Single-Head Vision Transformer with Memory Efficient Macro Designz https://github.com/ysj9909/SHViT)r$  r   
input_size	pool_sizecrop_pctinterpolationmeanre   
first_conv
classifier	paper_ids
paper_name
origin_urlr
   )r$  rP   s     r"   _cfgr2    s9    4}SYI%.B'x'`8	 	 	r#   ztimm/)	hf_hub_id)rw   r   r   )r3  r(  )zshvit_s1.in1kzshvit_s2.in1kzshvit_s3.in1kzshvit_s4.in1kvariant
pretrainedc                 N    t        t        | |ft        t        dd      d|}|S )N)r   r   rS   T)out_indicesflatten_sequential)pretrained_filter_fnfeature_cfg)r   r   r#  r   )r4  r5  rP   r   s       r"   _create_shvitr;    s6     w
1Y4H 	E Lr#   c           	      N    t        dddd      }t        dd| it        |fi |S )N)r   r&  i@  rS   rv      )r   0   D   r   r   r   r   r   r   r   r5  )shvit_s1r   r;  r5  rP   
model_argss      r"   rC  rC    5    !TceJY
Yd:>XQW>XYYr#   c           	      N    t        dddd      }t        dd| it        |fi |S )N)r   i4    r=  )r   B   r   rA  rB  r5  )shvit_s2rD  rE  s      r"   rK  rK    rG  r#   c           	      N    t        dddd      }t        dd| it        |fi |S )N)   i`  rI  )rw   r>  r>  )r?  K   r   rA  rB  r5  )shvit_s3rD  rE  s      r"   rO  rO    rG  r#   c           	      N    t        dddd      }t        dd| it        |fi |S )N)r&  iP  rI  )rv         )r?  H   r   rA  rB  r5  )shvit_s4rD  rE  s      r"   rT  rT    rG  r#   )r  r  )8r   typingr   r   r   r   r   r   r	   r/   torch.nnr;   torch.nn.functional
functionalr3   	timm.datar   r   timm.layersr   r   r   r   r   _builderr   	_featuresr   _manipulater   	_registryr   r   __all__r<   r   r   r+   rb   rq   r   r   r   r   r   r   r=   r#  r2  default_cfgsrn   r;  rC  rK  rO  rT  r  r#   r"   <module>ra     sU   @ ? ?     A a a * + ' <)ryy *% %P >299 *")) !299 !H 2   FkBII k\T#u||*;%< RYY SWX[]b]i]iXiSj B
c 
# 
$sCx. 
 %    & *3 D C E  Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z Z Zr#   