
    kh2S                        d Z ddlZddlmZmZmZmZ ddlZddlm	Z	 ddl
mZmZ ddlmZmZmZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ dgZ G d de	j<                        Z G d de	j<                        Z  G d de	j<                        Z! G d de	j<                        Z"ddZ# e e#d       e#d       e#d      d      Z$d Z%ddZ&edde"fd       Z'edde"fd       Z(edde"fd       Z)y)a   Transformer in Transformer (TNT) in PyTorch

A PyTorch implement of TNT as described in
'Transformer in Transformer' - https://arxiv.org/abs/2103.00112

The official mindspore code is released and available at
https://gitee.com/mindspore/mindspore/tree/master/model_zoo/research/cv/TNT

The official pytorch code is released and available at
https://github.com/huawei-noah/Efficient-AI-Backbones/tree/master/tnt_pytorch
    N)ListOptionalTupleUnionIMAGENET_INCEPTION_MEANIMAGENET_INCEPTION_STD)MlpDropPathtrunc_normal__assert	to_2tupleresample_abs_pos_embed   )build_model_with_cfg)feature_take_indices)
checkpoint)generate_default_cfgsregister_modelTNTc                   *     e Zd ZdZd fd	Zd Z xZS )	Attentionz Multi-Head Attention
    c                    t         |           || _        || _        ||z  }|| _        |dz  | _        t        j                  ||dz  |      | _        t        j                  |||      | _	        t        j                  |d      | _        t        j                  ||      | _        t        j                  |d      | _        y )Ng         biasT)inplace)super__init__
hidden_dim	num_headshead_dimscalennLinearqkvDropout	attn_dropproj	proj_drop)	selfdimr    r!   qkv_biasr)   r+   r"   	__class__s	           K/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/tnt.pyr   zAttention.__init__    s    $"* %
))Cah?3(3It<IIc3'	It<    c                 |   |j                   \  }}}| j                  |      j                  ||d| j                  | j                        j                  ddddd      }|j                  d      \  }}| j                  |      j                  ||| j                  d      j                  dddd      }||j                  dd      z  | j                  z  }	|	j                  d      }	| j                  |	      }	|	|z  j                  dd      j                  ||d      }| j                  |      }| j                  |      }|S )	Nr   r      r      r-   )shaper&   reshaper!   r"   permuteunbindr'   	transposer#   softmaxr)   r*   r+   )
r,   xBNCr&   qkr'   attns
             r0   forwardzAttention.forward.   s   ''1aWWQZ1aGOOPQSTVWYZ\]^yy|1FF1IaDNNB7??1aKAKKB''4::5|||#~~d#AX  A&..q!R8IIaLNN1r1   )   F        rG   )__name__
__module____qualname____doc__r   rE   __classcell__r/   s   @r0   r   r      s    =r1   r   c            
       f     e Zd ZdZdddddddej
                  ej                  df
 fd	Zd Z xZ	S )	Blockz TNT Block
    r4            @FrG   c                    t         |            ||      | _        t        |||||	|      | _         ||      | _        t        |t        |dz        |||      | _        || _	        | j                  r5 ||      | _
        t        j                  ||z  |d      | _        d | _        n= |||z        | _
        t        j                  ||z  |d      | _         ||      | _         ||      | _        t        |||||	|      | _        |
dkD  rt#        |
      nt        j$                         | _         ||      | _        t        |t        ||z        |||      | _        y )N)r!   r.   r)   r+   r4   )in_featureshidden_featuresout_features	act_layerdropTr   FrG   )r   r   norm_inr   attn_innorm_mlp_inr
   intmlp_inlegacy
norm1_projr$   r%   r*   
norm2_projnorm_outattn_outr   Identity	drop_pathnorm_mlpmlp)r,   r-   dim_out	num_pixelnum_heads_innum_heads_out	mlp_ratior.   r+   r)   rc   rV   
norm_layerr]   r/   s                 r0   r   zBlock.__init__B   sQ     	!# "
 &c?aL
 ;;(oDO		#	/7FDI"DO(y9DO		#	/7GDI(1DO #7+!#
 1:B),BKKM"7+) 34 
r1   c                 z   || j                  | j                  | j                  |                  z   }|| j                  | j                  | j	                  |                  z   }|j                         \  }}}| j                  at        j                  |d d ddf   |d d dd f   | j                  | j                  |      j                  ||dz
  d            z   gd      }not        j                  |d d ddf   |d d dd f   | j                  | j                  | j                  |j                  ||dz
  d                        z   gd      }|| j                  | j                  | j                  |                  z   }|| j                  | j                  | j                  |                  z   }||fS )Nr   r   r5   r7   )rc   rY   rX   r\   rZ   sizer_   torchcatr*   r^   r9   ra   r`   re   rd   )r,   pixel_embedpatch_embedr?   r@   rA   s         r0   rE   zBlock.forward   s   !DNN4<<[@Y3Z$[[!DNN4;;t?O?OP[?\3]$^^""$1a??"))AqsF#AqrE"TYYt{/K/S/STUWX[\W\^`/a%bb% K
  ))AqsF#AqrE"T__TYYt{ObObcdfgjkfkmoOp?q5r%ss% K "DNN4=={A[3\$]]!DNN488DMM+<V3W$XXK''r1   )
rH   rI   rJ   rK   r$   GELU	LayerNormr   rE   rL   rM   s   @r0   rO   rO   >   s<     gg||A
F(r1   rO   c                        e Zd ZdZ	 	 	 	 	 	 d
 fd	Zddeeeef   ef   fdZdeeef   deeef   fdZ	de
j                  de
j                  de
j                  fd	Z xZS )
PixelEmbedz Image to Pixel Embedding
    c                 F   t         
|           t        |      }t        |      }|d   |d   z  |d   |d   z  f| _        | j                  d   | j                  d   z  }|| _        || _        || _        || _        || _        |D cg c]  }t        j                  ||z         }	}|	| _        t        j                  || j                  dd|      | _        | j                  rt        j                  |	|	      | _        y t        j                  ||      | _        y c c}w )Nr   r      r3   )kernel_sizepaddingstride)rx   rz   )r   r   r   	grid_sizeimg_size
patch_sizer]   num_patchesin_dimmathceilnew_patch_sizer$   Conv2dr*   Unfoldunfold)r,   r|   r}   in_chansr   rz   r]   r~   psr   r/   s             r0   r   zPixelEmbed.__init__   s    	X&z*
"1+A6zRS}8TU~~a(T^^A->? $&;EFR$))BK0FF,IIhAV\]	;;))~VDK))
:NDK Gs   Dreturnc                 H    |rt        | j                        S | j                  S N)maxr}   )r,   	as_scalars     r0   
feat_ratiozPixelEmbed.feat_ratio   s    t''??"r1   r|   c                 V    |d   | j                   d   z  |d   | j                   d   z  fS )Nr   r   )r}   )r,   r|   s     r0   dynamic_feat_sizezPixelEmbed.dynamic_feat_size   s2    {dooa00(1+QRAS2SSSr1   r>   	pixel_posc                    |j                   \  }}}}t        || j                  d   k(  d| d| d| j                  d    d| j                  d    d	       t        || j                  d   k(  d| d| d| j                  d    d| j                  d    d	       | j                  rx| j	                  |      }| j                  |      }|j                  dd      j                  || j                  z  | j                  | j                  d   | j                  d         }nm| j                  |      }|j                  dd      j                  || j                  z  || j                  d   | j                  d         }| j	                  |      }||z   }|j                  || j                  z  | j                  d      j                  dd      }|S )	Nr   zInput image size (*z) doesn't match model (r   z).r   r5   )r8   r   r|   r]   r*   r   r<   r9   r~   r   r   r}   )r,   r>   r   r?   rA   HWs          r0   rE   zPixelEmbed.forward   s   WW
1aq!! 1QC'>t}}Q?O>PPQRVR_R_`aRbQccef	h 	q!! 1QC'>t}}Q?O>PPQRVR_R_`aRbQccef	h ;;		!AAAAq!))D$$$dkk43F3Fq3I4K^K^_`KacA AAAq!))!d.>.>*>4??STCUW[WfWfghWijA		!A	MIIa$***DKK<FFq!Lr1   )      r3   0   r4   FT)rH   rI   rJ   rK   r   r   r   r[   r   r   rn   TensorrE   rL   rM   s   @r0   ru   ru      s    
 O:#E%S/32F,G #T%S/ TeCHo T %,, 5<< r1   ru   c                   ~    e Zd ZdZdddddddd	d
d	dddddddej
                  d
df fd	Zd Zej                  j                  d        Zej                  j                  d'd       Zej                  j                  d(d       Zej                  j                  dej                  fd       Zd)dedee   fdZ	 	 	 	 	 	 d*dej(                  deeeee   f      dedededededeeej(                     eej(                  eej(                     f   f   fdZ	 	 	 d+deeee   f   d ed!efd"Zd# Zd'd$efd%Zd& Z xZS ),r   zC Transformer in Transformer - https://arxiv.org/abs/2103.00112
    r   r   r3     tokeni   r   rP   r4   rQ   FrG   c                 h   t         |           |dv sJ || _        || _        |x| _        x| _        | _        d| _        d| _        t        ||||||      | _
        | j                  j                  }t        | j                  d      r| j                  j                         n|}|| _        | j                  j                  }|d   |d   z  } |||z        | _        t!        j"                  ||z  |      | _         ||      | _        t!        j(                  t+        j,                  dd|            | _        t!        j(                  t+        j,                  d|dz   |            | _        t!        j(                  t+        j,                  d||d   |d               | _        t!        j4                  |      | _        t+        j8                  d||      D cg c]  }|j;                          }}g }t=        |      D ]+  }|j?                  tA        ||||	|
||||||   ||             - t!        jB                  |      | _"        t=        |      D cg c]  }tG        d	| ||
       c}| _$         ||      | _%        t!        j4                  |      | _&        |dkD  rt!        j"                  ||      nt!        jN                         | _(        tS        | j.                  d       tS        | j0                  d       tS        | j2                  d       | jU                  | jV                         y c c}w c c}w )N r   avgr   F)r|   r}   r   r   rz   r]   r   r   )p)r-   rf   rg   rh   ri   rj   r.   r+   r)   rc   rk   r]   zblocks.)modulenum_chs	reduction{Gz?std),r   r   num_classesglobal_poolnum_featureshead_hidden_size	embed_dimnum_prefix_tokensgrad_checkpointingru   rp   r~   hasattrr   r   r^   r$   r%   r*   r_   	Parameterrn   zeros	cls_token	patch_posr   r(   pos_droplinspaceitemrangeappendrO   
ModuleListblocksdictfeature_infonorm	head_droprb   headr   apply_init_weights)r,   r|   r}   r   r   r   r   	inner_dimdepthnum_heads_innernum_heads_outerrj   r.   	drop_ratepos_drop_rateproj_drop_rateattn_drop_ratedrop_path_raterk   first_strider]   r~   rr   rg   r>   dprr   ir/   s                                r0   r   zTNT.__init__   s   . 	2222&&ENNND1DN!""'%!
 &&22-4T5E5E|-TD'')Zd&))88"1%q(99	$Y%:;IIi)3Y?	$Y/ekk!Q	&BCekk![1_i&PQekk!Yq@QSabcSd&ef

]3!&>5!IJAqvvxJJu 	AMM%!#,-#!((a&% 	 mmF+PUV[P\^KLD'!yAF^ y)	I.9DqBIIi5bkkm	dnn#.dnn#.dnn#.

4%%&9 K$^s   L*L/c                    t        |t        j                        rjt        |j                  d       t        |t        j                        r8|j
                  +t        j                  j                  |j
                  d       y y y t        |t        j                        rUt        j                  j                  |j
                  d       t        j                  j                  |j                  d       y y )Nr   r   r   g      ?)	
isinstancer$   r%   r   weightr   init	constant_rs   )r,   ms     r0   r   zTNT._init_weights2  s    a#!((,!RYY'AFF,>!!!&&!, -?'2<<(GGaffa(GGahh, )r1   c                 
    h dS )N>   r   r   r    r,   s    r0   no_weight_decayzTNT.no_weight_decay;  s    66r1   c                 $    t        dddg      }|S )Nz=^cls_token|patch_pos|pixel_pos|pixel_embed|norm[12]_proj|proj)z^blocks\.(\d+)N)z^norm)i )stemr   )r   )r,   coarsematchers      r0   group_matcherzTNT.group_matcher?  s!    Q)$
 r1   c                     || _         y r   )r   )r,   enables     r0   set_grad_checkpointingzTNT.set_grad_checkpointingJ  s
    "(r1   r   c                     | j                   S r   )r   r   s    r0   get_classifierzTNT.get_classifierN  s    yyr1   r   r   c                     || _         ||dv sJ || _        |dkD  r&t        j                  | j                  |      | _        y t        j
                         | _        y )Nr   r   )r   r   r$   r%   r   rb   r   )r,   r   r   s      r0   reset_classifierzTNT.reset_classifierR  sS    &""6666*D>IAoBIIdnnk:	SUS^S^S`	r1   r>   indicesreturn_prefix_tokensr   
stop_early
output_fmtintermediates_onlyc                    |dv sJ d       |dk(  }g }	t        t        | j                        |      \  }
}|j                  \  }}}}| j	                  || j
                        }| j                  | j                  | j                  |j                  || j                  d                        }t        j                  | j                  j                  |dd      |fd      }|| j                  z   }| j!                  |      }t        j"                  j%                         s|s| j                  }n| j                  d|dz    }t'        |      D ]u  \  }}| j(                  r/t        j"                  j%                         st+        |||      \  }}n |||      \  }}||
v sR|	j-                  |r| j/                  |      n|       w | j0                  rD|	D cg c]  }|ddd| j0                  f    }}|	D cg c]  }|dd| j0                  df    }	}|ra| j                  j3                  ||f      \  }}|	D cg c]6  }|j                  |||d      j5                  dd	dd
      j7                         8 }	}t        j"                  j%                         s|rt9        t;        |	            }	|r|	S | j/                  |      }||	fS c c}w c c}w c c}w )a<   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if an int, if is a sequence, select by matching indices
            return_prefix_tokens: Return both prefix and spatial intermediate tokens
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )NCHWNLCz)Output format must be one of NCHW or NLC.r   r5   r   r7   Nr   r3   r   )r   lenr   r8   rp   r   r_   r*   r^   r9   r~   rn   ro   r   expandr   r   jitis_scripting	enumerater   r   r   r   r   r   r:   
contiguouslistzip)r,   r>   r   r   r   r   r   r   r9   intermediatestake_indices	max_indexr?   _heightwidthrp   rq   r   r   blkyprefix_tokensr   r   s                            r0   forward_intermediateszTNT.forward_intermediatesY  s   . _,Y.YY,&"6s4;;7G"Qi  gg1fe&&q$..9oodii@S@STUW[WgWgik@l0m&noii!6!6q"b!A; OUVW!DNN2mmK099!!#:[[F[[)a-0F' 	VFAs&&uyy/E/E/G+5c;+T([+.{K+H([L $$tTYY{%;U	V !!ERSQq!D$:$:"::;SMSDQRqQq$"8"8"99:RMR##55vuoFDAq^klYZQYYq!Q3;;Aq!QGRRTlMlyy%%',@ ]M!BCM  ii,M))! TR
 ms   K4K8;K
prune_norm
prune_headc                     t        t        | j                        |      \  }}| j                  d|dz    | _        |rt        j                         | _        |r| j                  dd       |S )z@ Prune layers not required for specified intermediates.
        Nr   r   r   )r   r   r   r$   rb   r   r   )r,   r   r   r   r   r   s         r0   prune_intermediate_layerszTNT.prune_intermediate_layers  s]     #7s4;;7G"Qikk.9q=1DI!!!R(r1   c                 j   |j                   d   }| j                  || j                        }| j                  | j	                  | j                  |j                  || j                  d                        }t        j                  | j                  j                  |dd      |fd      }|| j                  z   }| j                  |      }| j                  D ]I  }| j                  r/t        j                   j#                         st%        |||      \  }}> |||      \  }}K | j'                  |      }|S )Nr   r5   r   r7   )r8   rp   r   r_   r*   r^   r9   r~   rn   ro   r   r   r   r   r   r   r   r   r   r   )r,   r>   r?   rp   rq   r   s         r0   forward_featureszTNT.forward_features  s	   GGAJ&&q$..9oodii@S@STUW[WgWgik@l0m&noii!6!6q"b!A; OUVW!DNN2mmK0;; 	IC&&uyy/E/E/G+5c;+T([+.{K+H([		I ii,r1   
pre_logitsc                     | j                   r=| j                   dk(  r%|d d | j                  d f   j                  d      n|d d df   }| j                  |      }|r|S | j	                  |      S )Nr   r   r7   r   )r   r   meanr   r   )r,   r>   r  s      r0   forward_headzTNT.forward_head  sq    =A=M=MQV=V!T++,,-22q29\]^_ab^b\cANN1q0DIIaL0r1   c                 J    | j                  |      }| j                  |      }|S r   )r  r  )r,   r>   s     r0   rE   zTNT.forward  s'    !!!$a r1   Fr   r   )NFFFr   F)r   FT)rH   rI   rJ   rK   r$   rs   r   r   rn   r   ignorer   r   r   Moduler   r[   r   strr   r   r   r   boolr   r   r  r  r  rE   rL   rM   s   @r0   r   r      s   
 ||+R'h- YY7 7 YY  YY) ) YY		  aC ahsm a 8<).$$',G*||G* eCcN34G* #'	G*
 G* G* G* !%G* 
tELL!5tELL7I)I#JJ	KG*V ./$#	3S	>*  	 $1$ 1r1   c                 8    | ddd dddt         t        dddd	d
d|S )Nr   )r3   r   r   g?bicubicTzpixel_embed.projr   zarXiv:2103.00112zTransformer in TransformerzMhttps://github.com/huawei-noah/Efficient-AI-Backbones/tree/master/tnt_pytorch)urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizer  r   
first_conv
classifier	paper_ids
paper_name
origin_urlr   )r  kwargss     r0   _cfgr    s;    =t'0F('2e
 
 
r1   ztimm/)	hf_hub_id)ztnt_s_legacy_patch16_224.in1kztnt_s_patch16_224.in1kztnt_b_patch16_224.in1kc                    | j                  dd        d| v r| }nxi }| j                         D ]b  \  }}|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd	      }|j                  d
d      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|j                  dd      }|dk(  rh|j                  j                  dk(  rO|j
                  \  }}}t        |dz        x}}	||	z  |k(  sJ |j                  ddd       j                  ||||	      }|||<   e 	 |d   j
                  |j                  j
                  k7  r(t        |d   |j                  j                  d !      |d<   |S )"Nouter_tokensr   	outer_pos	inner_posr   rq   rp   
proj_norm1r^   
proj_norm2r_   inner_norm1rX   
inner_attnrY   inner_norm2rZ   	inner_mlpr\   outer_norm1r`   
outer_attnra   outer_norm2rd   	outer_mlpre   Fg      ?r   r   r   )new_sizer   )popitemsreplacerp   r]   r8   r[   r:   r9   r   r   r{   )

state_dictmodelout_dictrC   r'   r?   r@   rA   r   r   s
             r0   checkpoint_filter_fnr4    s   NN>4(j $$& 	DAq		+{3A		+{3A		-7A		,5A		,5A		-3A		,	2A		-7A		+x0A		-4A		,
3A		-4A		+u-AKE$5$5$<$<$E''1aAH%A1uz!zIIaA&..q!Q:HQK'	* U""eoo&;&;; 6[!&&00!

 Or1   c                 r    |j                  dd      }t        t        | |ft        t	        |d      d|}|S )Nout_indicesr3   getter)r6  feature_cls)pretrained_filter_fnfeature_cfg)r.  r   r   r4  r   )variant
pretrainedr  r6  r2  s        r0   _create_tntr=    sF    **]A.K Wj1[hG 	E
 Lr1   r   c           	      X    t        ddddddd      }t        d
d	| it        |fi |}|S )Nr        rP      FT)r}   r   r   r   r   r.   r]   r<  )tnt_s_legacy_patch16_224r   r=  r<  r  	model_cfgr2  s       r0   rB  rB    sA    "at%I gzgTR[Mf_eMfgELr1   c           	      V    t        dddddd      }t        d	d| it        |fi |}|S )
Nr   r?  r@  rP   rA  Fr}   r   r   r   r   r.   r<  )tnt_s_patch16_224rC  rD  s       r0   rH  rH  &  s>    "aI `
`d9F_X^F_`ELr1   c           	      V    t        dddddd      }t        d	d| it        |fi |}|S )
Nr   i  (   rP   
   FrG  r<  )tnt_b_patch16_224rC  rD  s       r0   rL  rL  /  s>    "bI `
`d9F_X^F_`ELr1   )r   r
  )*rK   r   typingr   r   r   r   rn   torch.nnr$   	timm.datar   r	   timm.layersr
   r   r   r   r   r   _builderr   	_featuresr   _manipulater   	_registryr   r   __all__r  r   rO   ru   r   r  default_cfgsr4  r=  rB  rH  rL  r   r1   r0   <module>rW     s  
  / /   E ` ` * + # <'		 DY(BII Y(x= =@s")) sl %%)& # #&  "J C   S   S  r1   