
    khWJ                        d Z ddlZddlmZ ddlmZmZ ddlmZm	Z	m
Z
mZmZmZmZ ddlmZ ddlmZ ddlmZmZ d	gZ G d
 dej.                        Z G d dej.                        Z G d dej.                        Z G d d	ej.                        ZddZddZ e ed       ed      d      Zeddefd       Zeddefd       Z y)z Visformer

Paper: Visformer: The Vision-friendly Transformer - https://arxiv.org/abs/2104.12533

From original at https://github.com/danczs/Visformer

Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman
    NIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)	to_2tupletrunc_normal_DropPath
PatchEmbedLayerNorm2dcreate_classifieruse_fused_attn   )build_model_with_cfg)checkpoint_seq)register_modelgenerate_default_cfgs	Visformerc                   F     e Zd Zddej                  dddf fd	Zd Z xZS )
SpatialMlpN           Fc           	         t         	|           |xs |}|xs |}t        |      }|| _        || _        || _        | j
                  r|dk  r	|dz  dz  }n|dz  }|| _        || _        t        j                  ||dddd      | _
         |       | _        t        j                  |d         | _        | j
                  r8t        j                  ||ddd| j                  d	      | _         |       | _        nd | _        d | _        t        j                  ||dddd      | _        t        j                  |d         | _        y )
N         r   r   Fstridepaddingbias   )r   r   groupsr   )super__init__r   in_featuresout_featuresspatial_convhidden_featuresgroupnnConv2dconv1act1Dropoutdrop1conv2act2conv3drop3)
selfr#   r&   r$   	act_layerdropr'   r%   
drop_probs	__class__s
            Q/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/visformer.pyr"   zSpatialMlp.__init__   s,    	#2{)8[t_
&((qy"-/Q"6"-/.
YY{OQqRSZ_`
K	ZZ
1.
!AqQUQ[Q[bgiDJ!DIDJDIYYaST[`a
ZZ
1.
    c                    | j                  |      }| j                  |      }| j                  |      }| j                  "| j                  |      }| j	                  |      }| j                  |      }| j                  |      }|S N)r*   r+   r-   r.   r/   r0   r1   r2   xs     r7   forwardzSpatialMlp.forward=   sl    JJqMIIaLJJqM::!

1A		!AJJqMJJqMr8   )__name__
__module____qualname__r(   GELUr"   r=   __classcell__r6   s   @r7   r   r      s&     !gg$/L	r8   r   c                   `     e Zd ZU ej                  j
                  e   ed<   d fd	Zd Z	 xZ
S )	Attention
fused_attnc                    t         |           || _        || _        t	        ||z  |z        }|| _        |dz  | _        t        d      | _        t        j                  |||z  dz  dddd      | _        t        j                  |      | _        t        j                  | j
                  | j                  z  |dddd      | _        t        j                  |      | _        y )	Ng      T)experimentalr   r   r   Fr   )r!   r"   dim	num_headsroundhead_dimscaler   rF   r(   r)   qkvr,   	attn_dropproj	proj_drop)r2   rI   rJ   head_dim_ratiorO   rQ   rL   r6   s          r7   r"   zAttention.__init__L   s    "	)N:; %
(d;99S(Y"6":AaQRY^_I.IIdmmdnn<c1QXY`ef	I.r8   c                    |j                   \  }}}}| j                  |      j                  |d| j                  | j                  d      j                  ddddd      }|j                  d      \  }}}| j                  rzt        j                  j                  j                  |j                         |j                         |j                         | j                  r| j                  j                  nd      }nJ||j!                  d	d      z  | j"                  z  }	|	j%                  d
      }	| j                  |	      }	|	|z  }|j                  dddd      j                  |d||      }| j'                  |      }| j)                  |      }|S )Nr   r   r   r      r   )	dropout_p)rI   )shaperN   reshaperJ   rL   permuteunbindrF   torchr(   
functionalscaled_dot_product_attention
contiguoustrainingrO   p	transposerM   softmaxrP   rQ   )
r2   r<   BCHWqkvattns
             r7   r=   zAttention.forwardZ   sG   WW
1aHHQK1dnndmmRHPPQRTUWXZ[]^_((1+1a??##@@.2mm$..** A A
 B++tzz9D<<B<'D>>$'DqAIIaAq!))!RA6IIaLNN1r8   )r         ?r   r   )r>   r?   r@   r\   jitFinalbool__annotations__r"   r=   rB   rC   s   @r7   rE   rE   I   s     		%%/r8   rE   c            
       N     e Zd Zdddddej                  edddf
 fd	Zd Z xZS )Blockrl         @r   r   Fc                 F   t         |           || _        |dkD  rt        |      nt	        j
                         | _        |rd | _        d | _        n" |	|      | _        t        |||||      | _         |	|      | _
        t        |t        ||z        |||
|      | _        y )Nr   )rJ   rR   rO   rQ   )r#   r&   r3   r4   r'   r%   )r!   r"   r%   r   r(   Identity	drop_pathnorm1rk   rE   norm2r   intmlp)r2   rI   rJ   rR   	mlp_ratiorQ   rO   rv   r3   
norm_layerr'   attn_disabledr%   r6   s                r7   r"   zBlock.__init__q   s     	(09B),BKKMDJDI#CDJ!#-##DI  _
i0%
r8   c                     | j                   2|| j                  | j                  | j                  |                  z   }|| j                  | j                  | j	                  |                  z   }|S r:   )rk   rv   rw   rz   rx   r;   s     r7   r=   zBlock.forward   sY    99 DNN499TZZ]#;<<Atxx

1677r8   )	r>   r?   r@   r(   rA   r
   r"   r=   rB   rC   s   @r7   rr   rr   p   s2    
 gg"'
Rr8   rr   c                   F    e Zd Zddddddddd	d
d
d
d
d
eddddddddf fd	Zd Zej                  j                  dd       Z	ej                  j                  dd       Z
ej                  j                  dej                  fd       Zd dedefdZd ZddefdZd Z xZS )!r         r              r   rs   r   111TFr   avgNc                    t         |           t        |      }|| _        || _        || _        || _        || _        || _        t        |t        t        f      r!|\  | _        | _        | _        t        |      }n2|dz  x| _        | _        || j                  z
  | j                  z
  | _        || _        d| _        t%        j&                  d||      D cg c]  }|j)                          }}| j                  r1d | _        t-        |||||d      | _        |D cg c]  }||z  	 }}n| j
                  9d | _        t-        ||dz  ||dz  |d      | _        |D cg c]
  }||dz  z   }}nt1        j2                  t1        j4                  || j
                  dddd      t1        j6                  | j
                        t1        j8                  d	            | _        |D cg c]  }|dz  	 }}t-        ||d
z  | j
                  |dz  |d      | _        |D cg c]
  }||d
z  z   }}| j                   r| j                  r0t1        j:                  t%        j<                  d|g|       | _        n2t1        j:                  t%        j<                  d|dz  g|       | _        t1        j@                  |      | _!        nd | _        t1        j2                  tE        | j                        D cg c]*  }tG        |dz  |d|	||||   |||d   dk(  |d   dk(        , c} | _$        | j                  sut-        ||dz  |dz  ||d      | _%        |D cg c]
  }||dz  z   }}| j                   r0t1        j:                  t%        j<                  d|g|       | _&        nd | _&        nd | _%        t1        j2                  tE        | j                  | j                  | j                  z         D cg c]'  }tG        ||d|	||||   |||d   dk(  |d   dk(        ) c} | _'        | j                  sxt-        ||dz  ||dz  |d      | _(        |D cg c]
  }||dz  z   }}| j                   r3t1        j:                  t%        j<                  d|dz  g|       | _)        nd | _)        nd | _(        t1        j2                  tE        | j                  | j                  z   |      D cg c]*  }tG        |dz  |d|	||||   |||d   dk(  |d   dk(        , c} | _*        | j                  r|n|dz  x| _+        | _,         || jV                        | _-        t]        | jV                  | j                  |      \  }}|| _/        t1        j@                  |
      | _0        || _1        | j                   rQte        | j>                  d       | j                  s.te        | jL                  d       te        | jR                  d       | jg                  | jh                         y c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w c c}w )Nr   Fr   )img_size
patch_sizein_chans	embed_dimr|   flattenr      r   T)inplacerU   r   )ra   g      ?01)rI   rJ   rR   r{   rQ   rO   rv   r|   r'   r}   r%   r   rl   	pool_type{Gz?std)5r!   r"   r   num_classesr   init_channelsr   vit_stem	conv_init
isinstancelisttuple
stage_num1
stage_num2
stage_num3sumuse_pos_embedgrad_checkpointingr\   linspaceitemstemr	   patch_embed1r(   
Sequentialr)   BatchNorm2dReLU	Parameterzeros
pos_embed1r,   pos_droprangerr   stage1patch_embed2
pos_embed2stage2patch_embed3
pos_embed3stage3num_featureshead_hidden_sizenormr   global_pool	head_dropheadr   apply_init_weights)r2   r   r   r   r   r   r   depthrJ   r{   	drop_ratepos_drop_rateproj_drop_rateattn_drop_ratedrop_path_rater|   
attn_stager   r%   r   r'   r   r   
embed_normr<   dprir   r6   s                               r7   r"   zVisformer.__init__   s   4 	X&&"*  "edE]+@E=DOT_doJE05
:DOdo#doo5GDO*"'!&>5!IJAqvvxJJ==DI *!%!#%!D 2::AZ:H:!!) 	$.%)Q%'1n)!%! =EEqA*/2EEMMIIh(:(:AaQRY^_NN4#5#56GGD)	
 -55qAF55$.%)Q!//'1n)!%! =EEqA*/2EE}}"$,,u{{1i/S(/S"T"$,,u{{1il/VX/V"WJJ7DM"DOmm 4??+&
  qL#"#((a&%)!}3*1o4&
 $ }} *!%?"a#%!D 9AA1jAo.AHA!!"$,,u{{1i/S(/S"T"& $Dmm 4??DOODOO,KL&
  #"#((a&%)!}3*1o4&
 $ }} *!%?"#a-%!D 9AA1jAo.AHA!!"$,,u{{1ik/UH/U"V"& $Dmm 4??4??:EB&
  M#"#((a&%)!}3*1o4&
 " BFIT]`aTaaD1t001	 .d.?.?AQAQ]hiT&I.	 $//t4==doo48doo48

4%%&o K ; F 6 F&
6 B&
6 B&
s<   XXX $X%X*0/X/X4.,X9X>"/Yc                    t        |t        j                        rOt        |j                  d       |j
                  +t        j                  j                  |j
                  d       y y t        |t        j                        r| j                  r-t        j                  j                  |j                  dd       nt        |j                  d       |j
                  +t        j                  j                  |j
                  d       y y y )Nr   r   r   fan_outrelu)modenonlinearityr   )r   r(   Linearr   weightr   init	constant_r)   r   kaiming_normal_)r2   ms     r7   r   zVisformer._init_weightsf  s    a#!((-vv!!!!&&!, "299%~~''yv'VahhD1vv!!!!&&"- " &r8   c                 .    t        d|rdndd fddg      S )Nz^patch_embed1|pos_embed1|stemz^stage(\d+)\.(\d+))z^(?:patch_embed|pos_embed)(\d+))r   )z^norm)i )r   blocks)dict)r2   coarses     r7   group_matcherzVisformer.group_matchers  s+    1*0&6KTR:$
 	
r8   c                     || _         y r:   )r   )r2   enables     r7   set_grad_checkpointingz Visformer.set_grad_checkpointing~  s
    "(r8   returnc                     | j                   S r:   )r   )r2   s    r7   get_classifierzVisformer.get_classifier  s    yyr8   r   r   c                 p    || _         t        | j                  | j                   |      \  | _        | _        y )Nr   )r   r   r   r   r   )r2   r   r   s      r7   reset_classifierzVisformer.reset_classifier  s/    &&78I8I4K[K[gr&s#$)r8   c                    | j                   | j                  |      }| j                  |      }| j                  | j                  || j                  z         }| j                  r5t
        j                  j                         st        | j                  |      }n| j                  |      }| j                  ;| j                  |      }| j                  | j                  || j                  z         }| j                  r5t
        j                  j                         st        | j                  |      }n| j                  |      }| j                  ;| j                  |      }| j                  | j                  || j                  z         }| j                  r5t
        j                  j                         st        | j                  |      }n| j                  |      }| j!                  |      }|S r:   )r   r   r   r   r   r\   rm   is_scriptingr   r   r   r   r   r   r   r   r   r;   s     r7   forward_featureszVisformer.forward_features  s{   99 		!A a ??&a$//12A""599+A+A+Ct{{A.AAA (!!!$A*MM!doo"56""599+A+A+Ct{{A.AAA (!!!$A*MM!doo"56""599+A+A+Ct{{A.AAAIIaLr8   
pre_logitsc                 p    | j                  |      }| j                  |      }|r|S | j                  |      S r:   )r   r   r   )r2   r<   r   s      r7   forward_headzVisformer.forward_head  s5    QNN1q0DIIaL0r8   c                 J    | j                  |      }| j                  |      }|S r:   )r   r   r;   s     r7   r=   zVisformer.forward  s'    !!!$a r8   F)T)r   )r>   r?   r@   r
   r"   r   r\   rm   ignorer   r   r(   Moduler   ry   strr   r   ro   r   r=   rB   rC   s   @r7   r   r      s     "1B'H. YY
 
 YY) ) YY		  tC tc t"H1$ 1
r8   c                 d    |j                  dd       rt        d      t        t        | |fi |}|S )Nfeatures_onlyz<features_only not implemented for Vision Transformer models.)getRuntimeErrorr   r   )variant
pretraineddefault_cfgkwargsmodels        r7   _create_visformerr     s4    zz/4(YZZ GZJ6JELr8   c                 2    | ddddddt         t        ddd	|S )
Nr   )r   r   r   )r   r   g?bicubicTzstem.0r   )urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizemeanr   
first_conv
classifierr   )r   r   s     r7   _cfgr    s2    =v%.Bf  r8   ztimm/)	hf_hub_id)zvisformer_tiny.in1kzvisformer_small.in1kr   c                     t        ddddddddt        j                  d	t        j                  
      }t        dd| it        |fi |}|S )Nr      r   rU   rU   r   rs   r   011100Tr   r   r   rJ   r{   r'   r   r%   r|   r   r   r   )visformer_tinyr   r(   r   r   r   r   	model_cfgr   s       r7   r	  r	    sT    CyAQS[\uSW>>#I c:ciIb[aIbcELr8   c                     t        ddddddddt        j                  d	t        j                  
      }t        dd| it        |fi |}|S )Nr   r   r  r   rs   r   r  r  Tr  r   )visformer_smallr
  r  s       r7   r  r    sT    CyAQS[\uSW>>#I dJd$yJc\bJcdELr8   )FN) r   )!__doc__r\   torch.nnr(   	timm.datar   r   timm.layersr   r   r   r	   r
   r   r   _builderr   _manipulater   	_registryr   r   __all__r   r   rE   rr   r   r   r  default_cfgsr	  r   r8   r7   <module>r     s      A v v v * ' <-0 0f$		 $N.BII .bU		 Up %'2 73&  )   9  r8   