
    kh;                        d Z 	 ddlmZ ddlZddlmZ ddlmZmZ ddl	m
Z
mZmZmZmZmZ ddlmZ ddlmZ dd	lmZmZ d
gZe G d dej0                               Z G d dej0                        Z G d dej0                        Z G d d
ej0                        ZddZddZ e ed       ed       ed      d      Zeddefd       Z eddefd       Z!eddefd       Z"y)a   ConViT Model

@article{d2021convit,
  title={ConViT: Improving Vision Transformers with Soft Convolutional Inductive Biases},
  author={d'Ascoli, St{'e}phane and Touvron, Hugo and Leavitt, Matthew and Morcos, Ari and Biroli, Giulio and Sagun, Levent},
  journal={arXiv preprint arXiv:2103.10697},
  year={2021}
}

Paper link: https://arxiv.org/abs/2103.10697
Original code: https://github.com/facebookresearch/convit, original copyright below

Modifications and additions for timm hacked together by / Copyright 2021, Ross Wightman
    )OptionalNIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)DropPathtrunc_normal_
PatchEmbedMlp	LayerNormHybridEmbed   )build_model_with_cfg)register_notrace_module)register_modelgenerate_default_cfgsConVitc                   h     e Zd Z	 	 	 	 	 d	 fd	Zd Zd Zd
dZd Zdede	j                  fdZ xZS )GPSAc                 `   t         |           || _        || _        ||z  }|dz  | _        || _        t        j                  ||dz  |      | _        t        j                  |||      | _	        t        j                  |      | _        t        j                  ||      | _        t        j                  d|      | _        t        j                  |      | _        t        j                  t!        j"                  | j                              | _        t!        j&                  dddd      | _        y )N         bias   r   )super__init__	num_headsdimscalelocality_strengthnnLinearqkvDropout	attn_dropprojpos_proj	proj_drop	Parametertorchonesgating_paramzerosrel_indices)	selfr   r   qkv_biasr&   r)   r    head_dim	__class__s	           N/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/convit.pyr   zGPSA.__init__)   s     	")#%
!2))Cqx83(3I.IIc3'			!Y/I.LLDNN)CD).Q1a)@    c                    |j                   \  }}}| j                  | j                  j                   d   |k7  r| j                  |      | _        | j                  |      }| j	                  |      j                  ||| j                  || j                  z        j                  dddd      }||z  j                  dd      j                  |||      }| j                  |      }| j                  |      }|S )Nr   r   r   r   )shaper/   get_rel_indicesget_attentionr$   reshaper   permute	transposer'   r)   )r0   xBNCattnr$   s          r4   forwardzGPSA.forwardC   s    ''1a#t'7'7'='=a'@A'E#33A6D!!!$FF1IaDNNA4GHPPQRTUWXZ[\AX  A&..q!Q7IIaLNN1r5   c                    |j                   \  }}}| j                  |      j                  ||d| j                  || j                  z        j	                  ddddd      }|d   |d   }}| j
                  j                  |ddd      }| j                  |      j	                  dddd      }||j                  dd      z  | j                  z  }	|	j                  d      }	|j                  d      }| j                  j                  dddd      }
d	t        j                  |
      z
  |	z  t        j                  |
      |z  z   }||j                  d      j!                  d      z  }| j#                  |      }|S )
Nr   r   r   r      r         ?)r7   r#   r:   r   r;   r/   expandr(   r<   r   softmaxr-   viewr+   sigmoidsum	unsqueezer&   )r0   r=   r>   r?   r@   r#   qk	pos_scorepatch_scoregatingrA   s               r4   r9   zGPSA.get_attentionN   sa   ''1aWWQZ1adnn9LMUUVWYZ\]_`bcd!ube1$$++Ar2r:	MM),44Q1a@	1;;r2..$**<!))b)1%%"%-	""''2q!4U]]6**k9EMM&<QT]<]]R **2..~~d#r5   c                     | j                  |      j                  d      }| j                  j                         d d d d df   dz  }t	        j
                  d||f      |j                  d      z  }|r||fS |S )Nr   rE         ?	nm,hnm->h)r9   meanr/   squeezer+   einsumsize)r0   r=   
return_mapattn_map	distancesdists         r4   get_attention_mapzGPSA.get_attention_map^   sw    %%a(--a0$$,,.q!Rx8B>	||K)X)>?)..QRBSS>!Kr5   c                    | j                   j                  j                  j                  t	        j
                  | j                               d}t        | j                  dz        }|dz  dk(  r|dz
  dz  n|dz  }t        |      D ]  }t        |      D ]  }|||z  z   }d| j                  j                  j                  |df<   d||z
  z  |z  | j                  j                  j                  |df<   d||z
  z  |z  | j                  j                  j                  |df<     | j                  j                  xj                  | j                  z  c_        y )Nr   rU   r   r   rE   )r$   weightdatacopy_r+   eyer   intr   ranger(   r    )r0   locality_distancekernel_sizecenterh1h2positions          r4   
local_initzGPSA.local_initg   s;     488!45$..B./*5/Q*>+/Q&KSTDT$ 	_BK( _b 009;$$))(A+69:b6k9JM^9^$$))(A+69:b6k9JM^9^$$))(A+6	_	_ 	!!T%;%;;!r5   num_patchesreturnc                 |   t        |dz        }t        j                  d||d      }t        j                  |      j	                  dd      t        j                  |      j	                  dd      z
  }|j                  ||      }|j                  |d      j                  |d      }|dz  |dz  z   }|j                  d      |d d d d d d df<   |j                  d      |d d d d d d df<   |j                  d      |d d d d d d df<   | j                  j                  j                  }|j                  |      S )NrU   r   r   rE   r   rG   r   )re   r+   r.   arangerK   repeatrepeat_interleaverN   r#   ra   deviceto)	r0   rn   img_sizer/   indindxindyinddrt   s	            r4   r8   zGPSA.get_rel_indicesu   s   {b()kk![+qAll8$))!R05<<3I3N3NrST3UUzz(H-$$X1$5GGVWGXqy419$"&.."3Aq!QJ"&.."3Aq!QJ"&.."3Aq!QJ&&~~f%%r5   )   F        r|   rH   F)__name__
__module____qualname__r   rB   r9   r_   rm   re   r+   Tensorr8   __classcell__r3   s   @r4   r   r   '   sE    
  A4	 <&3 &5<< &r5   r   c                   6     e Zd Z	 	 	 	 d fd	ZddZd Z xZS )MHSAc                 ,   t         |           || _        ||z  }|dz  | _        t	        j
                  ||dz  |      | _        t	        j                  |      | _        t	        j
                  ||      | _	        t	        j                  |      | _
        y )Nr   r   r   )r   r   r   r   r!   r"   qkvr%   r&   r'   r)   )r0   r   r   r1   r&   r)   r2   r3   s          r4   r   zMHSA.__init__   sw     	")#%
99S#'9I.IIc3'	I.r5   c                    |j                   \  }}}| j                  |      j                  ||d| j                  || j                  z        j	                  ddddd      }|d   |d   |d   }	}}||j                  dd      z  | j                  z  }
|
j                  d      j                  d      }
t        |d	z        }t        j                  |      j                  dd      t        j                  |      j                  dd      z
  }|j                  ||      }|j                  |d      j                  |d      }|dz  |dz  z   }|d	z  }|j                  |j                         }t        j"                  d
||
f      |z  }|r||
fS |S )Nr   r   r   r   rD   rF   rE   rG   rU   rV   )r7   r   r:   r   r;   r<   r   rJ   rW   re   r+   rq   rK   rr   rs   ru   rt   rY   )r0   r=   r[   r>   r?   r@   r   rO   rP   r$   r\   rv   rw   rx   ry   rz   r]   r^   s                     r4   r_   zMHSA.get_attention_map   s~   ''1ahhqk!!!Q4>>1;NOWWXY[\^_abdefa&#a&#a&a1B++tzz9###+003qBw<ll8$))!R05<<3I3N3NrST3UUzz(H-$$X1$5GGVWGXqy419$BJ	LL*	||K)X)>?!C>!Kr5   c                    |j                   \  }}}| j                  |      j                  ||d| j                  || j                  z        j	                  ddddd      }|j                  d      \  }}}||j                  dd      z  | j                  z  }	|	j                  d      }	| j                  |	      }	|	|z  j                  dd      j                  |||      }| j                  |      }| j                  |      }|S )	Nr   r   r   r   rD   rF   rE   rG   )r7   r   r:   r   r;   unbindr<   r   rJ   r&   r'   r)   )
r0   r=   r>   r?   r@   r   rO   rP   r$   rA   s
             r4   rB   zMHSA.forward   s    ''1ahhqk!!!Q4>>1;NOWWXY[\^_abdef**Q-1aAKKB''4::5|||#~~d#AX  A&..q!Q7IIaLNN1r5   )r{   Fr|   r|   r}   )r~   r   r   r   r_   rB   r   r   s   @r4   r   r      s      /$*r5   r   c            	       L     e Zd Zdddddej                  eddf	 fd	Zd Z xZS )Block      @Fr|   TrH   c                 j   t         |            |	|      | _        |
| _        | j                  rt	        ||||||      | _        nt        |||||      | _        |dkD  rt        |      nt        j                         | _
         |	|      | _        t        ||z        }t        ||||      | _        y )N)r   r1   r&   r)   r    )r   r1   r&   r)   r|   )in_featureshidden_features	act_layerdrop)r   r   norm1use_gpsar   rA   r   r   r!   Identity	drop_pathnorm2re   r
   mlp)r0   r   r   	mlp_ratior1   r)   r&   r   r   
norm_layerr   r    mlp_hidden_dimr3   s                r4   r   zBlock.__init__   s     	_
 ==#!##"3DI #!##DI 1:B),BKKM_
S9_-*	
r5   c                     || j                  | j                  | j                  |                  z   }|| j                  | j                  | j	                  |                  z   }|S N)r   rA   r   r   r   r0   r=   s     r4   rB   zBlock.forward   sO    tyyA788txx

1677r5   )	r~   r   r   r!   GELUr   r   rB   r   r   s   @r4   r   r      s/     gg  *
Xr5   r   c                       e Zd ZdZddddddddd	d
ddddddedddf fd	Zd Zej                  j                  d        Z
ej                  j                  dd       Zej                  j                  dd       Zej                  j                  dej                  fd       Zddedee   fdZd ZddefdZd Z xZS ) r   zI Vision Transformer with support for patch or hybrid CNN input stage
          r     tokeni      r   Fr|   NrH   Tc                 (   t         |           |dv sJ ||z  }|| _        || _        || _        |x| _        x| _        | _        || _        || _	        |t        ||||      | _        nt        ||||      | _        | j                  j                  }|| _        t        j                  t!        j"                  dd|            | _        t        j&                  |      | _        | j                  rFt        j                  t!        j"                  d||            | _        t-        | j*                  d       t!        j.                  d||      D cg c]  }|j1                          }}t        j2                  t5        |      D cg c]  }t7        |||	|
||||   |||k  |	
       c}      | _         ||      | _        t=        |dd
      g| _        t        j&                  |      | _         |dkD  rt        jB                  ||      nt        jD                         | _#        t-        | j$                  d       | jI                  | jJ                         | jM                         D ]"  \  }}tO        |d      s|jQ                          $ y c c}w c c}w )N) avgr   )rv   in_chans	embed_dim)rv   
patch_sizer   r   r   )p{Gz?stdr   )
r   r   r   r1   r)   r&   r   r   r   r    head)num_chs	reductionmodulerm   ))r   r   num_classesglobal_poollocal_up_to_layernum_featureshead_hidden_sizer   r    use_pos_embedr   patch_embedr	   rn   r!   r*   r+   r.   	cls_tokenr%   pos_drop	pos_embedr   linspaceitem
ModuleListrf   r   blocksnormdictfeature_info	head_dropr"   r   r   apply_init_weightsnamed_moduleshasattrrm   )r0   rv   r   r   r   r   r   depthr   r   r1   	drop_ratepos_drop_rateproj_drop_rateattn_drop_ratedrop_path_ratehybrid_backboner   r   r    r   rn   r=   dprinmr3   s                              r4   r   zConVit.__init__   sL   . 	2222Y	&&!2ENNND1DN!2*&*(XQZ \D  *!%!#	 D &&22&ekk!Q	&BC

]3\\%++ai*PQDN$..c2!&>5!IJAqvvxJJmm U|%%  ##!((a&%.."3%% & y)	 ")qPQI.9DqBIIi5bkkm	dnn#.

4%%&&&( 	DAqq,'	/ K%%s   J
"Jc                    t        |t        j                        rjt        |j                  d       t        |t        j                        r8|j
                  +t        j                  j                  |j
                  d       y y y t        |t        j                        rUt        j                  j                  |j
                  d       t        j                  j                  |j                  d       y y )Nr   r   r   rH   )	
isinstancer!   r"   r   ra   r   init	constant_r   )r0   r   s     r4   r   zConVit._init_weightsB  s    a#!((,!RYY'AFF,>!!!&&!, -?'2<<(GGaffa(GGahh, )r5   c                 
    ddhS )Nr   r    r0   s    r4   no_weight_decayzConVit.no_weight_decayK  s    [))r5   c                      t        dddg      S )Nz ^cls_token|pos_embed|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr   )r   )r0   coarses     r4   group_matcherzConVit.group_matcherO  s    4-/CD
 	
r5   c                     |rJ d       y )Nz$gradient checkpointing not supportedr   )r0   enables     r4   set_grad_checkpointingzConVit.set_grad_checkpointingV  s    AAAz6r5   ro   c                     | j                   S r   )r   r   s    r4   get_classifierzConVit.get_classifierZ  s    yyr5   r   r   c                     || _         ||dv sJ || _        |dkD  r&t        j                  | j                  |      | _        y t        j
                         | _        y )N)r   r   r   r   )r   r   r!   r"   r   r   r   )r0   r   r   s      r4   reset_classifierzConVit.reset_classifier^  sS    &""6666*D>IAoBIIdnnk:	SUS^S^S`	r5   c                    | j                  |      }| j                  r|| j                  z   }| j                  |      }| j                  j                  |j                  d   dd      }t        | j                        D ]5  \  }}|| j                  k(  rt        j                  ||fd      } ||      }7 | j                  |      }|S )Nr   rE   r   rG   )r   r   r   r   r   rI   r7   	enumerater   r   r+   catr   )r0   r=   
cls_tokensublks        r4   forward_featureszConVit.forward_featurese  s    QDNN"AMM!^^**1771:r2>
, 	FAsD***IIz1o15AA	 IIaLr5   
pre_logitsc                     | j                   r3| j                   dk(  r|d d dd f   j                  d      n|d d df   }| j                  |      }|r|S | j                  |      S )Nr   r   rG   r   )r   rW   r   r   )r0   r=   r   s      r4   forward_headzConVit.forward_headr  s_    (,(8(8E(A!QR%!$qAwANN1q0DIIaL0r5   c                 J    | j                  |      }| j                  |      }|S r   )r   r   r   s     r4   rB   zConVit.forwardx  s'    !!!$a r5   r}   )Tr   )r~   r   r   __doc__r   r   r   r+   jitignorer   r   r   r!   Moduler   re   r   strr   r   boolr   rB   r   r   s   @r4   r   r      s   
    +N`- YY* * YY
 
 YYB B YY		  aC ahsm a1$ 1r5   c                 `    |j                  dd       rt        d      t        t        | |fi |S )Nfeatures_onlyz<features_only not implemented for Vision Transformer models.)getRuntimeErrorr   r   )variant
pretrainedkwargss      r4   _create_convitr   ~  s0    zz/4(YZZFvFFr5   c           
      .    | ddd t         t        dddd	|S )Nr   )r   r   r   Tzpatch_embed.projr   )	urlr   
input_size	pool_sizerW   r   fixed_input_size
first_conv
classifierr   )r  r   s     r4   _cfgr    s.    =t%.BX\(	
  r5   ztimm/)	hf_hub_id)zconvit_tiny.fb_in1kzconvit_small.fb_in1kzconvit_base.fb_in1kro   c           	      T    t        dddd      }t        dd| dt        |fi |}|S )	N
   rH   0   rD   r   r    r   r   convit_tinyr   r   r   r   r   r   r   
model_argsmodels       r4   r  r    s;    rQPJf=Zf4PZKe^dKefELr5   c           	      T    t        dddd      }t        dd| dt        |fi |}|S )	Nr  rH   r  	   r  convit_smallr  r   r  r  s       r4   r  r    s;    rQPJg>jgDQ[Lf_eLfgELr5   c           	      T    t        dddd      }t        dd| dt        |fi |}|S )	Nr  rH   r  r   r  convit_baser  r   r  r  s       r4   r  r    s;    rRQJf=Zf4PZKe^dKefELr5   r}   )r   )#r   typingr   r+   torch.nnr!   	timm.datar   r   timm.layersr   r   r	   r
   r   r   _builderr   _features_fxr   	_registryr   r   __all__r   r   r   r   r   r   r  default_cfgsr  r  r  r   r5   r4   <module>r"     s  (    A X X * 1 < * X&299 X& X&v4299 4n1BII 1hMRYY M`G %'2 73'2	&  v      v  r5   