
    khU              
          d Z ddlZddlmZ ddlmZmZmZmZ ddl	Z	ddl
mZ ddlmc mZ ddlmZmZ ddlmZmZmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZm Z  ddl!m"Z" dgZ#ee$e$f   Z%e G d dejL                               Z' G d dejL                        Z( G d dejL                        Z) G d dejL                        Z* G d dejL                        Z+ G d dejL                        Z,d%dZ-d&dZ. e  e.d       e.d       e.d       e.d       e.d       e.d      d      Z/ed%de,fd       Z0ed%de,fd        Z1ed%de,fd!       Z2ed%de,fd"       Z3ed%de,fd#       Z4ed%de,fd$       Z5y)'z Twins
A PyTorch impl of : `Twins: Revisiting the Design of Spatial Attention in Vision Transformers`
    - https://arxiv.org/pdf/2104.13840.pdf

Code/weights from https://github.com/Meituan-AutoML/Twins, original copyright/license info below

    N)partial)ListOptionalTupleUnionIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)MlpDropPath	to_2tupletrunc_normal_use_fused_attn   )build_model_with_cfg)feature_take_indices)register_notrace_module)register_modelgenerate_default_cfgs)	AttentionTwinsc                   j     e Zd ZU dZej
                  j                  e   ed<   d fd	Z	de
fdZ xZS )LocallyGroupedAttnz( LSA: self attention within a group
    
fused_attnc                    |dk7  sJ t         t        |           ||z  dk(  sJ d| d| d       || _        || _        ||z  }|dz  | _        t               | _        t        j                  ||dz  d	      | _
        t        j                  |      | _        t        j                  ||      | _        t        j                  |      | _        || _        y )
Nr   r   dim   should be divided by num_heads .         Tbias)superr   __init__dim	num_headsscaler   r   nnLinearqkvDropout	attn_dropproj	proj_dropws)selfr%   r&   r,   r.   r/   head_dim	__class__s          M/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/twins.pyr$   zLocallyGroupedAttn.__init__)   s    Qww $02Y!#]tC50PQZP[[\%]]#")#%
(*99S#'5I.IIc3'	I.    sizec           	         |j                   \  }}}|\  }}|j                  ||||      }dx}}	| j                  || j                  z  z
  | j                  z  }
| j                  || j                  z  z
  | j                  z  }t        j                  |dd||
|	|f      }|j                   \  }}}}|| j                  z  || j                  z  }}|j                  ||| j                  || j                  |      j                  dd      }| j                  |      j                  |||z  | j                  | j                  z  d| j                  || j                  z        j                  dddddd      }|j                  d      \  }}}| j                  r<t        j                  |||| j                  r| j                  j                  nd      }nL|| j                   z  }||j                  d	d
      z  }|j#                  d
      }| j                  |      }||z  }|j                  dd      j                  |||| j                  | j                  |      }|j                  dd      j                  ||| j                  z  || j                  z  |      }|
dkD  s|dkD  r|d d d |d |d d f   j%                         }|j                  |||      }| j'                  |      }| j)                  |      }|S )Nr      r    r                 	dropout_pr%   )shapeviewr/   Fpadreshape	transposer*   r&   permuteunbindr   scaled_dot_product_attentiontrainingr,   pr'   softmax
contiguousr-   r.   )r0   xr5   BNCHWpad_lpad_tpad_rpad_b_HpWp_h_wr*   qkvattns                         r3   forwardzLocallyGroupedAttn.forward:   s    ''1a1FF1aA1tww;&$''11tww;&$''1EE!aE5%78ww2r1twwdggBIIaTWWb$''15??1Ehhqk!!rBw$'')1dnna4>>>QSSZSZ[\^_abdeghjkSl 	**Q-1a??..1a.2mm$..**A
 DJJAq{{2r**D<<B<'D>>$'DqAKK1%%aR$''1EKK1%%adggrDGG|QG19	!RaR!Q,**,AIIaAIIaLNN1r4      r:   r:   r   __name__
__module____qualname____doc__torchjitFinalbool__annotations__r$   Size_r`   __classcell__r2   s   @r3   r   r   #   s,    		%%"%u %r4   r   c                   j     e Zd ZU dZej
                  j                  e   ed<   d fd	Z	de
fdZ xZS )GlobalSubSampleAttnzQ GSA: using a  key to summarize the information for a group to be efficient.
    r   c                 d   t         |           ||z  dk(  sJ d| d| d       || _        || _        ||z  }|dz  | _        t               | _        t        j                  ||d      | _	        t        j                  ||dz  d      | _
        t        j                  |      | _        t        j                  ||      | _        t        j                  |      | _        || _        |d	kD  r9t        j                   ||||
      | _        t        j$                  |      | _        y d | _        d | _        y )Nr   r   r   r   r   Tr!   r7   r   kernel_sizestride)r#   r$   r%   r&   r'   r   r   r(   r)   r\   kvr+   r,   r-   r.   sr_ratioConv2dsr	LayerNormnorm)r0   r%   r&   r,   r.   rw   r1   r2   s          r3   r$   zGlobalSubSampleAttn.__init__   s   Y!#]tC50PQZP[[\%]]#")#%
(*3$/))Cqt4I.IIc3'	I. a<iiShxPDGS)DIDGDIr4   r5   c                 F   |j                   \  }}}| j                  |      j                  ||| j                  || j                  z        j	                  dddd      }| j
                  i |j	                  ddd      j                  ||g| }| j                  |      j                  ||d      j	                  ddd      }| j                  |      }| j                  |      j                  |dd| j                  || j                  z        j	                  ddddd      }|j                  d      \  }}	| j                  rPt        j                  j                  j                  |||	| j                  r| j                  j                   nd      }nL|| j"                  z  }||j%                  d	d      z  }
|
j'                  d
      }
| j                  |
      }
|
|	z  }|j%                  dd      j                  |||      }| j)                  |      }| j+                  |      }|S )Nr   r7   r   r    r>   r8   r:   r;   r=   r?   )r@   r\   rD   r&   rF   ry   r{   rv   rG   r   rh   r(   
functionalrH   rI   r,   rJ   r'   rE   rK   r-   r.   )r0   rM   r5   rN   rO   rP   r\   rv   r]   r^   r_   s              r3   r`   zGlobalSubSampleAttn.forward   s   ''1aFF1IaDNNA4GHPPQRTUWXZ[\77*		!Q"**1a7$7A
""1a,44Q1=A		!AWWQZ2q$..!t~~:MNVVWXZ[]^`acdeyy|1??##@@1a.2mm$..** A A
 DJJAq{{2r**D<<B<'D>>$'DqAKK1%%aA.IIaLNN1r4   ra   rc   ro   s   @r3   rq   rq      s,    		%%0u r4   rq   c                   d     e Zd Zddddej                  ej
                  ddf fd	ZdefdZ xZ	S )Blockg      @r:   r   Nc                    t         |            ||      | _        |
t        ||dd ||      | _        n.|
dk(  rt        |||||	      | _        nt        |||||
      | _        |dkD  rt        |      nt        j                         | _
         ||      | _        t        |t        ||z        ||      | _        |dkD  rt        |      | _        y t        j                         | _        y )NFr   r:   )in_featureshidden_features	act_layerdrop)r#   r$   norm1r   r_   rq   r   r   r(   Identity
drop_path1norm2r   intmlp
drop_path2)r0   r%   r&   	mlp_ratior.   r,   	drop_pathr   
norm_layerrw   r/   r2   s              r3   r$   zBlock.__init__   s     	_
:!#y%y)TDI1W+CIyRZ[DI*3	9iQSTDI1:R(9-R[[]_
i0	
 2;R(9-R[[]r4   r5   c                     || j                  | j                  | j                  |      |            z   }|| j                  | j	                  | j                  |                  z   }|S N)r   r_   r   r   r   r   )r0   rM   r5   s      r3   r`   zBlock.forward   sQ    		$**Q- >??A 788r4   )
rd   re   rf   r(   GELUrz   r$   rm   r`   rn   ro   s   @r3   r   r      s8     gg||S@u r4   r   c                   2     e Zd Zd fd	ZdefdZd Z xZS )PosConvc                     t         t        |           t        j                  t        j
                  ||d|dd|            | _        || _        y )Nr    r   T)r"   groups)r#   r   r$   r(   
Sequentialrx   r-   ru   )r0   in_chans	embed_dimru   r2   s       r3   r$   zPosConv.__init__   sA    gt%'MMIIh	1fad9U
	 r4   r5   c                     |j                   \  }}} |j                  dd      j                  ||g| }| j                  |      }| j                  dk(  r||z  }|j                  d      j                  dd      }|S )Nr   r7   )r@   rE   rA   r-   ru   flatten)r0   rM   r5   rN   rO   rP   cnn_feat_tokens          r3   r`   zPosConv.forward   sx    ''1a/Q*//1<t<IIn%;;!AIIaL""1a(r4   c                 D    t        d      D cg c]  }d|z  	 c}S c c}w )Nr8   zproj.%d.weight)range)r0   is     r3   no_weight_decayzPosConv.no_weight_decay   s     .3Ah7 1$777s   )   r   )rd   re   rf   r$   rm   r`   r   rn   ro   s   @r3   r   r      s    u 8r4   r   c                   N     e Zd ZdZd fd	Zdeej                  ef   fdZ	 xZ
S )
PatchEmbedz Image to Patch Embedding
    c                    t         |           t        |      }t        |      }|| _        || _        |d   |d   z  dk(  r|d   |d   z  dk(  sJ d| d| d       |d   |d   z  |d   |d   z  c| _        | _        | j
                  | j                  z  | _        t        j                  ||||      | _
        t        j                  |      | _        y )Nr   r   z	img_size z! should be divided by patch_size r   rs   )r#   r$   r   img_size
patch_sizerQ   rR   num_patchesr(   rx   r-   rz   r{   )r0   r   r   r   r   r2   s        r3   r$   zPatchEmbed.__init__  s    X&z*
 ${Z]*a/HQK*Q-4OST4T 	Qz!B:,aP	QT!!
15x{jQRm7S66DFF?IIh	zR\]	LL+	r4   returnc                     |j                   \  }}}}| j                  |      j                  d      j                  dd      }| j	                  |      }|| j
                  d   z  || j
                  d   z  f}||fS )Nr7   r   r   )r@   r-   r   rE   r{   r   )r0   rM   rN   rP   rQ   rR   out_sizes          r3   r`   zPatchEmbed.forward  su    WW
1aIIaL  #--a3IIaL++Q$//!2D-DE({r4   )      r    r   )rd   re   rf   rg   r$   r   rh   Tensorrm   r`   rn   ro   s   @r3   r   r      s'    ,E%,,"56 r4   r   c                       e Zd ZdZdddddddd	d
ddddddd eej                  d      ef fd	Ze	j                  j                  d        Ze	j                  j                  d(d       Ze	j                  j                  d)d       Ze	j                  j                  dej                  fd       Zd*dedee   fdZd Z	 	 	 	 	 d+de	j,                  deeeee   f      dededededeee	j,                     ee	j,                  ee	j,                     f   f   fd Z	 	 	 d,deeee   f   d!ed"efd#Zd$ Zd(d%efd&Zd' Z xZ S )-r   z Twins Vision Transformer (Revisiting Spatial Attention)

    Adapted from PVT (PyramidVisionTransformer) class at https://github.com/whai362/PVT.git
    r   r8   r      avg@            )r   r7   r8   rb   r8   r8   r8   r8   r    r8      r    rb   r8   r7   r   Nr:   gư>)epsc                    t         |           || _        || _        |	| _        || _        |d   x| _        | _        d| _        t        |      }|}t        j                         | _        t        j                         | _        t        t        |	            D ]v  }| j                  j!                  t#        ||||                | j                  j!                  t        j$                  |             ||   }t'        fd|D              }dx t        j                         | _        g | _        t-        j.                  d|t1        |	            D cg c]  }|j3                          }}d}t        t        |	            D ]  }t        j                  t        |	|         D cg c]4  } |||   ||   ||   |||||z      ||
|   ||dz  dk(  rdn||   	      6 c}      }| j(                  j!                  |       | xj*                  t5        d	| ||   dd|z   z  
      gz  c_        ||	|   z  } t        j                  |D cg c]  }t7        ||       c}      | _         || j                        | _        t        j$                  |      | _        |dkD  r t        j>                  | j                  |      nt        j@                         | _!        | jE                  | jF                         y c c}w c c}w c c}w )Nr>   F)rJ   c              3   (   K   | ]	  }|z    y wr    ).0tr   s     r3   	<genexpr>z!Twins.__init__.<locals>.<genexpr>D  s     ?Q*_?s   r7   r   r   )	r%   r&   r   r.   r,   r   r   rw   r/   zblock.)modulenum_chs	reduction)$r#   r$   num_classesglobal_pooldepths
embed_dimsnum_featureshead_hidden_sizegrad_checkpointingr   r(   
ModuleListpatch_embeds	pos_dropsr   lenappendr   r+   tupleblocksfeature_inforh   linspacesumitemdictr   	pos_blockr{   	head_dropr)   r   headapply_init_weights)r0   r   r   r   r   r   r   r&   
mlp_ratiosr   	sr_ratioswss	drop_ratepos_drop_rateproj_drop_rateattn_drop_ratedrop_path_rater   	block_clsprev_chsr   rM   dprcurr]   _blockr   r2   s     `                        r3   r$   zTwins.__init__  s   * 	&&$4>rNBD1"'X&MMOs6{# 	A$$Z*hPZ[\P]%^_NN!!"**}"=>!!}H?h??HJ	 mmo!&>3v;!OPAqvvxPPs6{# 	A]] INfUViHX	$Z DE %.qM#A,$Q-((cAg,%"1q1uz1s1v	%? 	$Z 
F KKv&$qclJqM]^abcdad]e"f!gg6!9C	  Wa'b)	9(E'bct001	 I.ALqBIId//=VXVaVaVc	 	

4%%&7 Q	$Z (cs   K4$9K9

K>c                     t        | j                  j                         D cg c]
  \  }}d|z    c}}      S c c}}w )Nz
pos_block.)setr   named_parameters)r0   nrJ   s      r3   r   zTwins.no_weight_decayf  s1    1P1P1RSAL1$STTSs   :
c                 0    t        d|rddgng d      }|S )Nz^patch_embeds.0)z)^(?:blocks|patch_embeds|pos_block)\.(\d+)Nz^norm)i ))z^blocks\.(\d+)\.(\d+)N)z"^(?:patch_embeds|pos_block)\.(\d+))r   r   )stemr   )r   )r0   coarsematchers      r3   group_matcherzTwins.group_matcherj  s.    #  E#

 r4   c                     |rJ d       y )Nz$gradient checkpointing not supportedr   )r0   enables     r3   set_grad_checkpointingzTwins.set_grad_checkpointingy  s    AAAz6r4   r   c                     | j                   S r   )r   )r0   s    r3   get_classifierzTwins.get_classifier}  s    yyr4   r   r   c                     || _         ||dv sJ || _        |dkD  r&t        j                  | j                  |      | _        y t        j
                         | _        y )N) r   r   )r   r   r(   r)   r   r   r   )r0   r   r   s      r3   reset_classifierzTwins.reset_classifier  sT    &"+---*DALqBIId//=	VXVaVaVc	r4   c                 p   t        |t        j                        rjt        |j                  d       t        |t        j                        r8|j
                  +t        j                  j                  |j
                  d       y y y t        |t        j                        rUt        j                  j                  |j
                  d       t        j                  j                  |j                  d       y t        |t        j                        r|j                  d   |j                  d   z  |j                  z  }||j                  z  }|j                  j                  j                  dt        j                   d|z               |j
                  %|j
                  j                  j#                          y y y )Ng{Gz?)stdr   g      ?r   g       @)
isinstancer(   r)   r   weightr"   init	constant_rz   rx   rt   out_channelsr   datanormal_mathsqrtzero_)r0   mfan_outs      r3   r   zTwins._init_weights  s!   a#!((,!RYY'AFF,>!!!&&!, -?'2<<(GGaffa(GGahh,299%mmA&q)99ANNJG GHHMM!!!TYYsW}%=>vv!!!# "	 &r4   rM   indicesr{   
stop_early
output_fmtintermediates_onlyc           	      X   |dk(  sJ d       g }t        t        | j                        |      \  }}	|j                  \  }
}}}t	        t        | j                  | j                  | j                  | j                              D ]  \  }\  }}}} ||      \  }} ||      }t	        |      D ]  \  }} |||      }|dk(  s |||      } |t        | j                        dz
  k  rL |j                  |
g|d j                  dddd      j                         }||v s|j                  |       ||v s|r| j                  |      n|}|j                   |j                  |
g|d j                  dddd      j                                 |r|S | j                  |      }||fS )a   Forward features that returns intermediates.
        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        NCHWz$Output shape for Twins must be NCHW.r   r   r>   r    r7   )r   r   r   r@   	enumeratezipr   r   r   r   rD   rF   rL   r   r{   )r0   rM   r  r{   r	  r
  r  intermediatestake_indices	max_indexrN   rW   heightwidthr   embedr   r   pos_blkr5   jblkx_feats                          r3   forward_intermediateszTwins.forward_intermediates  s   ( V#K%KK#"6s4;;7G"Qi
  gg1fe1:3!!4>>4;;<P 2
 	h-A-tVW AhGAtQA#F+ )34L64(A)
 3t{{#a''AIIa+$++33Aq!Q?JJL$!((+$-1TYYq\qF!(()ED)E")E)M)MaQRTUWX)Y)d)d)fg%	h(   IIaL-r4   
prune_norm
prune_headc                     t        t        | j                        |      \  }}|rt        j                         | _        |r| j                  dd       |S )z@ Prune layers not required for specified intermediates.
        r   r   )r   r   r   r(   r   r{   r   )r0   r  r  r  r  r  s         r3   prune_intermediate_layerszTwins.prune_intermediate_layers  sG     #7s4;;7G"QiDI!!!R(r4   c                    |j                   d   }t        t        | j                  | j                  | j
                  | j                              D ]  \  }\  }}}} ||      \  }} ||      }t        |      D ]  \  }	}
 |
||      }|	dk(  s |||      } |t        | j                        dz
  k  se |j                  |g|d j                  dddd      j                         } | j                  |      }|S )Nr   r   r>   r    r7   )r@   r  r  r   r   r   r   r   r   rD   rF   rL   r{   )r0   rM   rN   r   r  r   r   r  r5   r  r  s              r3   forward_featureszTwins.forward_features  s    GGAJ1:D%%t~~t{{DNNS2U 		M-A-tVWAhGAtQA#F+ )34L64(A) 3t{{#a''AIIa+$++33Aq!Q?JJL		M IIaLr4   
pre_logitsc                     | j                   dk(  r|j                  d      }| j                  |      }|r|S | j                  |      S )Nr   r   r?   )r   meanr   r   )r0   rM   r!  s      r3   forward_headzTwins.forward_head  sB    u$1ANN1q0DIIaL0r4   c                 J    | j                  |      }| j                  |      }|S r   )r   r$  )r0   rM   s     r3   r`   zTwins.forward  s'    !!!$a r4   F)Tr   )NFFr  F)r   FT)!rd   re   rf   rg   r   r(   rz   r   r$   rh   ri   ignorer   r   r   Moduler   r   r   strr   r   r   r   r   rk   r   r  r  r   r$  r`   rn   ro   s   @r3   r   r     s    *"#"r||6'E'N YYU U YY  YYB B YY		  dC dhsm d$$ 8<$$',5 ||5  eCcN345  	5 
 5  5  !%5  
tELL!5tELL7I)I#JJ	K5 r ./$#	3S	>*  	 1$ 1r4   c                 h    |j                  dd      }t        t        | |fdt        |d      i|}|S )Nout_indicesr8   feature_cfggetter)r+  feature_cls)popr   r   r   )variant
pretrainedkwargsr+  models        r3   _create_twinsr4    sC    **]A.K w
[hG E
 Lr4   c                 2    | ddd dddt         t        ddd|S )	Nr   )r    r   r   g?bicubicTzpatch_embeds.0.projr   )urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizer#  r   
first_conv
classifierr   )r7  r2  s     r3   _cfgr?    s2    =t%.B+6  r4   ztimm/)	hf_hub_id)ztwins_pcpvt_small.in1kztwins_pcpvt_base.in1kztwins_pcpvt_large.in1kztwins_svt_small.in1kztwins_svt_base.in1kztwins_svt_large.in1kr   c           	      f    t        dg dg dg dg dg d      }t        d	d| it        |fi |S )
Nr8   r   r   i@  r   r   r7   r9   rb   rb   rb   r8   r8   r   r   r   r   r&   r   r   r   r1  )twins_pcpvt_smallr   r4  r1  r2  
model_argss      r3   rF  rF    s<    !4Ye|5J bbtJGaZ`Gabbr4   c           	      f    t        dg dg dg dg dg d      }t        d	d| it        |fi |S )
Nr8   rB  rC  rD  )r    r8      r    r   rE  r1  )twins_pcpvt_baserG  rH  s      r3   rL  rL     s<    !4Ye6J a
ad:F`Y_F`aar4   c           	      f    t        dg dg dg dg dg d      }t        d	d| it        |fi |S )
Nr8   rB  rC  rD  )r    rb      r    r   rE  r1  )twins_pcpvt_largerG  rH  s      r3   rO  rO  (  s<    !4Ye6J bbtJGaZ`Gabbr4   c           
      l    t        dg dg dg dg dg dg d      }t        d
d	| it        |fi |S )Nr8   r   )r7   r8   rb   r   r   )r7   r7   
   r8      rS  rS  rS  r   r   r   r&   r   r   r   r   r1  )twins_svt_smallrG  rH  s      r3   rU  rU  0  s?    !4Zf,,HJ `z`T*E_X^E_``r4   c           
      l    t        dg dg dg dg dg dg d      }t        d
d	| it        |fi |S )Nr8   )`      i  r   )r    r         r   r7   r7   rK  r7   rR  r   rT  r1  )twins_svt_baserG  rH  s      r3   r\  r\  8  s?    !4[g,,HJ _j_DD^W]D^__r4   c           
      l    t        dg dg dg dg dg dg d      }t        d
d	| it        |fi |S )Nr8   )r   r   r   i   )r8   rb   r       r   r[  rR  r   rT  r1  )twins_svt_largerG  rH  s      r3   r_  r_  @  s?    !6.]i,,HJ `z`T*E_X^E_``r4   r&  )r   )6rg   r  	functoolsr   typingr   r   r   r   rh   torch.nnr(   torch.nn.functionalr}   rB   	timm.datar	   r
   timm.layersr   r   r   r   r   _builderr   	_featuresr   _features_fxr   	_registryr   r   vision_transformerr   __all__r   rm   r(  r   rq   r   r   r   r   r4  r?  default_cfgsrF  rL  rO  rU  r\  r_  r   r4   r3   <module>rm     s     / /     A O O * + 1 < ))c3h ; ; ;B8")) 8v%BII %P8bii 8, 8\BII \~ %"W5!G4"W5 73'2 73&  cU c c bE b b cU c c a5 a a `% ` ` a5 a ar4   