
    kho                     :   d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
 	 ddlmZ ddlZddlmZ ddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZ d	d
lmZ d	dl m!Z! d	dl"m#Z#m$Z$ d	dl%m&Z&m'Z' d	dl(m)Z) dgZ* ejV                  e,      Z- G d dej\                        Z/ G d dej\                        Z0 G d dej\                        Z1 G d dej\                        Z2 G d dej\                        Z3d:dZ4d;dZ5 e& e5ddd       e5dd !       e5d"d#       e5d$d#       e5d%d#       e5d&d#       e5d'd#       e5d(d#       e5        e5d)d#       e5        e5d*d#       e5       d+      Z6e'd:d,e3fd-       Z7e'd:d,e3fd.       Z8e'd:d,e3fd/       Z9e'd:d,e3fd0       Z:e'd:d,e3fd1       Z;e'd:d,e3fd2       Z<e'd:d,e3fd3       Z=e'd:d,e3fd4       Z>e'd:d,e3fd5       Z?e'd:d,e3fd6       Z@e'd:d,e3fd7       ZAe'd:d,e3fd8       ZBe'd:d,e3fd9       ZCy# e$ r
 ddlmZ Y w xY w)<z Relative Position Vision Transformer (ViT) in PyTorch

NOTE: these models are experimental / WIP, expect changes

Hacked together by / Copyright 2022, Ross Wightman
    N)partial)ListOptionalTupleTypeUnion)Literal)FinalIMAGENET_INCEPTION_MEANIMAGENET_INCEPTION_STD)
PatchEmbedMlpDropPath	RelPosMlp
RelPosBiasuse_fused_attn	LayerType   )build_model_with_cfg)feature_take_indices)named_apply
checkpoint)generate_default_cfgsregister_model)get_init_weights_vitVisionTransformerRelPosc                   |     e Zd ZU ee   ed<   ddddddej                  f fd	Zd	de	e
j                     fdZ xZS )
RelPosAttention
fused_attn   FN        c	                 ^   t         	|           ||z  dk(  sJ d       || _        ||z  | _        | j                  dz  | _        t               | _        t        j                  ||dz  |      | _	        |r || j                        nt        j                         | _        |r || j                        nt        j                         | _        |r	 ||      nd | _        t        j                  |      | _        t        j                  ||      | _        t        j                  |      | _        y )Nr   z$dim should be divisible by num_headsg         )bias	num_heads)super__init__r'   head_dimscaler   r    nnLinearqkvIdentityq_normk_normrel_posDropout	attn_dropproj	proj_drop)
selfdimr'   qkv_biasqk_normrel_pos_clsr4   r6   
norm_layer	__class__s
            a/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/models/vision_transformer_relpos.pyr)   zRelPosAttention.__init__%   s     	Y!#K%KK#"y(]]d*
(*99S#'93:j/3:j/;F{Y7DI.IIc3'	I.    shared_rel_posc                    |j                   \  }}}| j                  |      j                  ||d| j                  | j                        j                  ddddd      }|j                  d      \  }}}	| j                  |      }| j                  |      }| j                  r| j                  | j                  j                         }
n||}
nd }
t        j                  j                  j                  |||	|
| j                   r| j"                  j$                  nd      }ns|| j&                  z  }||j)                  dd	      z  }| j                  | j                  ||
      }n|||z   }|j+                  d	      }| j#                  |      }||	z  }|j)                  dd      j                  |||      }| j-                  |      }| j/                  |      }|S )Nr$      r   r      r"   )	attn_mask	dropout_pr@   r8   )shaper.   reshaper'   r*   permuteunbindr0   r1   r    r2   get_biastorchr,   
functionalscaled_dot_product_attentiontrainingr4   pr+   	transposesoftmaxr5   r6   )r7   xr@   BNCr.   qkv	attn_biasattns               r>   forwardzRelPosAttention.forward?   s   ''1ahhqk!!!Q4>>4==IQQRSUVXY[\^_`**Q-1aKKNKKN??||' LL113	+*	 	##@@1a#.2mm$..** A A DJJAq{{2r**D||'||D|H+n,<<B<'D>>$'DqAKK1%%aA.IIaLNN1r?   N)__name__
__module____qualname__r
   bool__annotations__r,   	LayerNormr)   r   rO   Tensorr_   __classcell__r=   s   @r>   r   r   "   sB    d
 ||/4"%,,)? "r?   r   c                   &     e Zd Zd fd	Zd Z xZS )
LayerScalec                     t         |           || _        t        j                  |t        j                  |      z        | _        y r`   )r(   r)   inplacer,   	ParameterrO   onesgamma)r7   r8   init_valuesrm   r=   s       r>   r)   zLayerScale.__init__e   s2    \\+

3"?@
r?   c                 n    | j                   r|j                  | j                        S || j                  z  S r`   )rm   mul_rp   r7   rV   s     r>   r_   zLayerScale.forwardj   s(    %)\\qvvdjj!Eq4::~Er?   )gh㈵>F)ra   rb   rc   r)   r_   rh   ri   s   @r>   rk   rk   d   s    A
Fr?   rk   c            
            e Zd Zddddddddej                  ej
                  f
 fd	Zddeej                     fdZ
 xZS )	RelPosBlock      @FNr"   c           	         t         |            ||      | _        t        ||||||	|      | _        |rt        ||      nt        j                         | _        |
dkD  rt        |
      nt        j                         | _
         ||      | _        t        |t        ||z        ||      | _        |rt        ||      nt        j                         | _        |
dkD  rt        |
      | _        y t        j                         | _        y )Nr9   r:   r;   r4   r6   )rq   r"   in_featureshidden_features	act_layerdrop)r(   r)   norm1r   r^   rk   r,   r/   ls1r   
drop_path1norm2r   intmlpls2
drop_path2r7   r8   r'   	mlp_ratior9   r:   r;   rq   r6   r4   	drop_pathr}   r<   r=   s                r>   r)   zRelPosBlock.__init__p   s     	_
##
	 @K:c{;PRP[P[P]1:R(9-R[[]_
i0	
 @K:c{;PRP[P[P]1:R(9-R[[]r?   r@   c           
         || j                  | j                  | j                  | j                  |      |                  z   }|| j	                  | j                  | j                  | j                  |                        z   }|S NrH   )r   r   r^   r   r   r   r   r   r7   rV   r@   s      r>   r_   zRelPosBlock.forward   sf    4::a=Q_)` abb$**Q-)@ ABBr?   r`   )ra   rb   rc   r,   GELUrf   r)   r   rO   rg   r_   rh   ri   s   @r>   rv   rv   n   sG     gg||&SP%,,)? r?   rv   c            
            e Zd Zddddddddej                  ej
                  f
 fd	Zd Zd	dee	j                     fdZ xZS )
ResPostRelPosBlockrw   FNr"   c           	         t         |           || _        t        ||||||	|      | _         ||      | _        |
dkD  rt        |
      nt        j                         | _	        t        |t        ||z        ||      | _         ||      | _        |
dkD  rt        |
      nt        j                         | _        | j                          y )Nry   r"   rz   )r(   r)   rq   r   r^   r   r   r,   r/   r   r   r   r   r   r   init_weightsr   s                r>   r)   zResPostRelPosBlock.__init__   s     	&##
	  _
1:R(9-R[[]i0	
  _
1:R(9-R[[]r?   c                    | j                   }t        j                  j                  | j                  j
                  | j                          t        j                  j                  | j                  j
                  | j                          y y r`   )rq   r,   init	constant_r   weightr   r7   s    r>   r   zResPostRelPosBlock.init_weights   s[    'GGdjj//1A1ABGGdjj//1A1AB (r?   r@   c           	          || j                  | j                  | j                  ||                  z   }|| j                  | j	                  | j                  |                  z   }|S r   )r   r   r^   r   r   r   r   s      r>   r_   zResPostRelPosBlock.forward   sT    

499Q~9+V WXX

488A; 788r?   r`   )ra   rb   rc   r,   r   rf   r)   r   r   rO   rg   r_   rh   ri   s   @r>   r   r      sL     gg||'RC%,,)? r?   r   c            8       n    e Zd ZdZddddddddd	d
dddddddddddddeddefdeeeeef   f   deeeeef   f   dedede	d   dededede
dededee
   deded ed!ee   d"ed#e
d$e
d%e
d&e
d'e	d(   d)ed*eej                      d+ee   d,ee   d-eej                      f6 fd.ZdFd/Zd0 Zej,                  j.                  d1        Zej,                  j.                  dGd2       Zej,                  j.                  dHd3       Zej,                  j.                  d4ej                   fd5       ZdIdedee   fd6Z	 	 	 	 	 	 dJd7ej:                  d8eeeee   f      d9ed:ed;ed<ed=ed4eeej:                     eej:                  eej:                     f   f   fd>Z	 	 	 dKd8eeee   f   d?ed@efdAZ dB Z!dGdCefdDZ"dE Z# xZ$S )Lr   ah   Vision Transformer w/ Relative Position Bias

    Differing from classic vit, this impl
      * uses relative position index (swin v1 / beit) or relative log coord + mlp (swin v2) pos embed
      * defaults to no class token (can be enabled)
      * defaults to global avg pool for head (can be changed)
      * layer-scale (residual branch gain) enabled
          r$     avg      rw   TFư>r   Nr"   skipimg_size
patch_sizein_chansnum_classesglobal_pool) r   tokenmap	embed_dimdepthr'   r   r9   r:   rq   class_tokenfc_normrel_pos_typerel_pos_dimr@   	drop_rateproj_drop_rateattn_drop_ratedrop_path_rateweight_init)r   jaxmocor   fix_initembed_layerr<   r}   block_fnc                    t         #|           |dv sJ |s|dk7  sJ |xs t        t        j                  d      }|xs t        j
                  }|| _        || _        |x| _        x| _	        | _
        |rdnd| _        d| _         |||||      | _        | j                  j                  }t        | j                  d	      r| j                  j!                         n|}t#        || j                  
      }|j%                  d      r!|r||d<   d|v rd|d<   t        t&        fi |}nt        t(        fi |}d| _        |r ||      | _        d}|r4t        j,                  t/        j0                  d| j                  |            nd| _        t/        j4                  d||      D  cg c]  } | j7                          }!} t        j8                  t;        |      D "cg c]  }" ||||	|
||||||!|"   ||       c}"      | _        t;        |      D "cg c]  }"t#        d|" ||       c}"| _        |s ||      nt        j@                         | _!        |r ||      nt        j@                         | _"        t        jF                  |      | _$        |dkD  r t        jJ                  | j                  |      nt        j@                         | _&        |dk7  r| jO                  |       |r| jQ                          yyc c} w c c}"w c c}"w )aE  
        Args:
            img_size: input image size
            patch_size: patch size
            in_chans: number of input channels
            num_classes: number of classes for classification head
            global_pool: type of global pooling for final sequence (default: 'avg')
            embed_dim: embedding dimension
            depth: depth of transformer
            num_heads: number of attention heads
            mlp_ratio: ratio of mlp hidden dim to embedding dim
            qkv_bias: enable bias for qkv if True
            qk_norm: Enable normalization of query and key in attention
            init_values: layer-scale init values
            class_token: use class token (default: False)
            fc_norm: use pre classifier norm instead of pre-pool
            rel_pos_type: type of relative position
            shared_rel_pos: share relative pos across all blocks
            drop_rate: dropout rate
            proj_drop_rate: projection dropout rate
            attn_drop_rate: attention dropout rate
            drop_path_rate: stochastic depth rate
            weight_init: weight init scheme
            fix_init: apply weight initialization fix (scaling w/ layer index)
            embed_layer: patch embedding layer
            norm_layer: normalization layer
            act_layer: MLP activation layer
        r   r   r   r   r   )epsr   r   F)r   r   r   r   
feat_ratio)window_sizeprefix_tokensr   
hidden_dimswinmodeNr&   )r8   r'   r   r9   r:   r;   rq   r6   r4   r   r<   r}   zblocks.)modulenum_chs	reductionr   ))r(   r)   r   r,   rf   r   r   r   num_featureshead_hidden_sizer   num_prefix_tokensgrad_checkpointingpatch_embed	grid_sizehasattrr   dict
startswithr   r   r@   rn   rO   zeros	cls_tokenlinspaceitem
ModuleListrangeblocksfeature_infor/   normr   r3   	head_dropr-   headr   fix_init_weight)$r7   r   r   r   r   r   r   r   r'   r   r9   r:   rq   r   r   r   r   r@   r   r   r   r   r   r   r   r<   r}   r   	feat_sizerrel_pos_argsr;   rV   dprir=   s$                                      r>   r)   z VisionTransformerRelPos.__init__   s   t 	2222kW444B72<<T#B
(	&&ENNND1DN&1q"'&!	
 $$..	-4T5E5E|-TD'')Zd	AWAWX""5)-8\*%'-V$!)<|<K!*==K""-	"BDK\gekk!T5K5KY&WXmq!&>5!IJAqvvxJJmm 5\%#  ##!''((a&%#%# $" QVV[P\^KLD'!yAF^18Jy)bkkm	 18z),R[[]I.>IAoBIIdnnk:SUS^S^S`	& k*  " 9 K%# ^s   (K<LLc                     |dv sJ | j                   +t        j                  j                  | j                   d       t	        t        |      |        y )N)r   r   r   r   )std)r   r,   r   normal_r   r   )r7   r   s     r>   r   z$VisionTransformerRelPos.init_weights^  sC    ****>>%GGOODNNO5(.5r?   c                    d }t        | j                        D ]m  \  }} ||j                  j                  j                  j
                  |dz           ||j                  j                  j                  j
                  |dz          o y )Nc                 R    | j                  t        j                  d|z               y )Ng       @)div_mathsqrt)param	_layer_ids     r>   rescalez8VisionTransformerRelPos.fix_init_weight.<locals>.rescalee  s    JJtyyy12r?   r   )	enumerater   r^   r5   r   datar   fc2)r7   r   layer_idlayers       r>   r   z'VisionTransformerRelPos.fix_init_weightd  si    	3  )5 	=OHeEJJOO**//A>EIIMM((--x!|<	=r?   c                     dhS )Nr    r   s    r>   no_weight_decayz'VisionTransformerRelPos.no_weight_decayl  s
    }r?   c                      t        dddg      S )Nz^cls_token|patch_embed)z^blocks\.(\d+)N)z^norm)i )stemr   )r   )r7   coarses     r>   group_matcherz%VisionTransformerRelPos.group_matcherp  s    *-/CD
 	
r?   c                     || _         y r`   )r   )r7   enables     r>   set_grad_checkpointingz.VisionTransformerRelPos.set_grad_checkpointingw  s
    "(r?   returnc                     | j                   S r`   )r   r   s    r>   get_classifierz&VisionTransformerRelPos.get_classifier{  s    yyr?   c                     || _         ||dv sJ || _        |dkD  r&t        j                  | j                  |      | _        y t        j
                         | _        y )Nr   r   )r   r   r,   r-   r   r/   r   )r7   r   r   s      r>   reset_classifierz(VisionTransformerRelPos.reset_classifier  sS    &""6666*D>IAoBIIdnnk:	SUS^S^S`	r?   rV   indicesreturn_prefix_tokensr   
stop_early
output_fmtintermediates_onlyc           	      4   |dv sJ d       |dk(  }g }	t        t        | j                        |      \  }
}|j                  \  }}}}| j	                  |      }| j
                  At        j                  | j
                  j                  |j                  d   dd      |fd      }| j                  | j                  j                         nd}t        j                  j                         s|s| j                  }n| j                  d|dz    }t        |      D ]q  \  }}| j                  r-t        j                  j                         st        |||	      }n
 |||	      }||
v sN|	j!                  |r| j#                  |      n|       s | j$                  rD|	D cg c]  }|ddd| j$                  f    }}|	D cg c]  }|dd| j$                  df    }	}|ra| j                  j'                  ||f      \  }}|	D cg c]6  }|j)                  |||d      j+                  dd
dd      j-                         8 }	}t        j                  j                         s|rt/        t1        |	            }	|r|	S | j#                  |      }||	fS c c}w c c}w c c}w )a=   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            return_prefix_tokens: Return both prefix and spatial intermediate tokens
            norm: Apply norm layer to all intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )NCHWNLCz)Output format must be one of NCHW or NLC.r  Nr   rG   r   rI   rH   r$   rB   )r   lenr   rJ   r   r   rO   catexpandr@   rN   jitis_scriptingr   r   r   appendr   r   dynamic_feat_sizerK   rL   
contiguouslistzip)r7   rV   r   r   r   r   r  r  rK   intermediatestake_indices	max_indexrW   _heightwidthr@   r   r   blkyr   HWs                           r>   forward_intermediatesz-VisionTransformerRelPos.forward_intermediates  si   . _,Y.YY,&"6s4;;7G"Qi  gg1feQ>>%		4>>00RDaHaPA;?;N;N;Z,,557`d99!!#:[[F[[)a-0F' 	BFAs&&uyy/E/E/GsAnE.9L $$TTYYq\qA	B !!ERSQq!D$:$:"::;SMSDQRqQq$"8"8"99:RMR##55vuoFDAq^klYZQYYq!Q3;;Aq!QGRRTlMlyy%%',@ ]M!BCM  IIaL- TR ms   J<J ;J
prune_norm
prune_headc                    t        t        | j                        |      \  }}| j                  d|dz    | _        |rt        j                         | _        |r+t        j                         | _        | j                  dd       |S )z@ Prune layers not required for specified intermediates.
        Nr   r   r   )r   r  r   r,   r/   r   r   r   )r7   r   r  r  r  r  s         r>   prune_intermediate_layersz1VisionTransformerRelPos.prune_intermediate_layers  sj     #7s4;;7G"Qikk.9q=1DI;;=DL!!!R(r?   c                    | j                  |      }| j                  At        j                  | j                  j	                  |j
                  d   dd      |fd      }| j                  | j                  j                         nd }| j                  D ]E  }| j                  r-t        j                  j                         st        |||      }< |||      }G | j                  |      }|S )Nr   rG   r   rI   rH   )r   r   rO   r  r  rJ   r@   rN   r   r   r	  r
  r   r   )r7   rV   r@   r  s       r>   forward_featuresz(VisionTransformerRelPos.forward_features  s    Q>>%		4>>00RDaHaPA;?;N;N;Z,,557`d;; 	:C&&uyy/E/E/GsAnE.9		:
 IIaLr?   
pre_logitsc                    | j                   r=| j                   dk(  r%|d d | j                  d f   j                  d      n|d d df   }| j                  |      }| j	                  |      }|r|S | j                  |      S )Nr   r   rI   r   )r   r   meanr   r   r   )r7   rV   r!  s      r>   forward_headz$VisionTransformerRelPos.forward_head  s~    =A=M=MQV=V!T++,,-22q29\]^_ab^b\cALLONN1q0DIIaL0r?   c                 J    | j                  |      }| j                  |      }|S r`   )r   r$  rt   s     r>   r_   zVisionTransformerRelPos.forward  s'    !!!$a r?   r   F)Tr`   )NFFFr  F)r   FT)%ra   rb   rc   __doc__r   rv   r   r   r   r	   floatrd   r   strr   r,   Moduler   r)   r   r   rO   r	  ignorer   r   r   r   r   rg   r   r  r  r   r$  r_   rh   ri   s   @r>   r   r      s    5868#>C !!!+/ %! %)-#(!$&$&$&>D"+5.2-1(39}#CsCx01}# c5c?23}# 	}#
 }# !!:;}# }# }# }# }# }# }# "%}# }# }#  !}#" "##}#$ !%}#& '}#( ")}#* "+}#, "-}#. !!:;/}#0 1}#2 bii3}#4 !+5}#6  	*7}#8 299o9}#~6= YY  YY
 
 YY) ) YY		  aC ahsm a 8<).$$',B ||B  eCcN34B  #'	B 
 B  B  B  !%B  
tELL!5tELL7I)I#JJ	KB L ./$#	3S	>*  	"1$ 1r?   c                 h    |j                  dd      }t        t        | |fdt        |d      i|}|S )Nout_indicesr$   feature_cfggetter)r.  feature_cls)popr   r   r   )variant
pretrainedkwargsr.  models        r>   !_create_vision_transformer_relposr7    sC    **]A.K *[hG E
 Lr?   r   c                 2    | ddd dddt         t        ddd|S )	Nr   )r$   r   r   g?bicubicTzpatch_embed.projr   )urlr   
input_size	pool_sizecrop_pctinterpolationfixed_input_sizer#  r   
first_conv
classifierr   )r:  r5  s     r>   _cfgrB     s2    =t'0F(  r?   zhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_replos_base_patch32_plus_rpn_256-sw-dd486f51.pthztimm/)r$      rC  )r:  	hf_hub_idr;  )r$      rE  )r:  r;  zhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_small_patch16_224-sw-ec2778b4.pth)r:  rD  zhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_medium_patch16_224-sw-11c174af.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_base_patch16_224-sw-49049aed.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_srelpos_small_patch16_224-sw-6cdb8849.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_srelpos_medium_patch16_224-sw-ad702b8c.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_medium_patch16_cls_224-sw-cfe8e259.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_base_patch16_gapcls_224-sw-1a341d6c.pthzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-tpu-weights/vit_relpos_medium_patch16_rpn_224-sw-5d2befd8.pth)z,vit_relpos_base_patch32_plus_rpn_256.sw_in1kz*vit_relpos_base_patch16_plus_240.untrainedz$vit_relpos_small_patch16_224.sw_in1kz%vit_relpos_medium_patch16_224.sw_in1kz#vit_relpos_base_patch16_224.sw_in1kz%vit_srelpos_small_patch16_224.sw_in1kz&vit_srelpos_medium_patch16_224.sw_in1kz)vit_relpos_medium_patch16_cls_224.sw_in1kz)vit_relpos_base_patch16_cls_224.untrainedz*vit_relpos_base_patch16_clsgap_224.sw_in1kz*vit_relpos_small_patch16_rpn_224.untrainedz)vit_relpos_medium_patch16_rpn_224.sw_in1kz)vit_relpos_base_patch16_rpn_224.untrainedr   c           	      ^    t        ddddt              }t        	 dd| it        |fi |}|S )z` ViT-Base (ViT-B/32+) w/ relative log-coord position and residual post-norm, no class token
          r      )r   r   r   r'   r   r4  )$vit_relpos_base_patch32_plus_rpn_256r   r   r7  r4  r5  
model_argsr6  s       r>   rJ  rJ  3  sG     s"UghJ-.e;EeIMjIc\bIceELr?   c           	      T    t        dddd      }t        	 dd| it        |fi |}|S )zI ViT-Base (ViT-B/16+) w/ relative log-coord position, no class token
    r   rH  r   rI  )r   r   r   r'   r4  ) vit_relpos_base_patch16_plus_240r   r7  rL  s       r>   rO  rO  =  sD     s"KJ-*a7AaEI*E_X^E_aELr?   c           	      X    t        dddddd      }t        	 d	d| it        |fi |}|S )
H ViT-Base (ViT-B/16) w/ relative log-coord position, no class token
    r     r      FTr   r   r   r'   r9   r   r4  )vit_relpos_small_patch16_224rP  rL  s       r>   rV  rV  G  sJ     s"TYcghJ-&]3=]AEjA[TZA[]ELr?   c           	      X    t        dddddd      }t        	 d	d| it        |fi |}|S )
rR  r      r   r!   FTrU  r4  )vit_relpos_medium_patch16_224rP  rL  s       r>   rY  rY  Q  sM     B!eUY[J-'^4>^BFzB\U[B\^ELr?   c           	      X    t        dddddd      }t        	 dd| it        |fi |}|S )	rR  r   r   r   FTrU  r4  )vit_relpos_base_patch16_224rP  rL  s       r>   r[  r[  \  sM     B"uVZ\J-%\2<\@DZ@ZSY@Z\ELr?   c           
      \    t        dddddddd      }t        	 d	d| it        |fi |}|S )
O ViT-Base (ViT-B/16) w/ shared relative log-coord position, no class token
    r   rS  r   rT  FTr   r   r   r'   r9   r   r   r@   r4  )vit_srelpos_small_patch16_224rP  rL  s       r>   r_  r_  g  sS     B!eUZ.J .'^4>^BFzB\U[B\^ELr?   c           
      \    t        dddddddd      }t        	 d	d| it        |fi |}|S )
r]  r   rX  r   r!   FTr^  r4  )vit_srelpos_medium_patch16_224rP  rL  s       r>   ra  ra  s  sS     B!eUZ.J .(_5?_CG
C]V\C]_ELr?   c                 ^    t        ddddddddd		      }t        	 dd
| it        |fi |}|S )zM ViT-Base (ViT-M/16) w/ relative log-coord position, class token present
    r   rX  r   r!   FrC  Tr   )	r   r   r   r'   r9   r   r   r   r   r4  )!vit_relpos_medium_patch16_cls_224rP  rL  s       r>   rc  rc    sV     B!eUZTw@J .+b8BbFJ:F`Y_F`bELr?   c           	      Z    t        ddddddd      }t        	 d	d| it        |fi |}|S )
zM ViT-Base (ViT-B/16) w/ relative log-coord position, class token present
    r   r   r   FTr   )r   r   r   r'   r9   r   r   r4  )vit_relpos_base_patch16_cls_224rP  rL  s       r>   re  re    sP     B"uZ^lsuJ-)`6@`DHD^W]D^`ELr?   c           	      Z    t        ddddddd      }t        	 dd| it        |fi |}|S )	a   ViT-Base (ViT-B/16) w/ relative log-coord position, class token present
    NOTE this config is a bit of a mistake, class token was enabled but global avg-pool w/ fc-norm was not disabled
    Leaving here for comparisons w/ a future re-train as it performs quite well.
    r   r   r   FT)r   r   r   r'   r9   r   r   r4  )"vit_relpos_base_patch16_clsgap_224rP  rL  s       r>   rg  rg    sP     B"uVZhlnJ-,c9CcGKJGaZ`GacELr?   c           	      `    t        dddddt              }t        	 dd| it        |fi |}|S )	_ ViT-Base (ViT-B/16) w/ relative log-coord position and residual post-norm, no class token
    r   rS  r   rT  Fr   r   r   r'   r9   r   r4  ) vit_relpos_small_patch16_rpn_224rK  rL  s       r>   rk  rk    sM     B!eVhjJ-*a7AaEI*E_X^E_aELr?   c           	      `    t        dddddt              }t        	 dd| it        |fi |}|S )	ri  r   rX  r   r!   Frj  r4  )!vit_relpos_medium_patch16_rpn_224rK  rL  s       r>   rm  rm    sM     B!eVhjJ-+b8BbFJ:F`Y_F`bELr?   c           	      `    t        dddddt              }t        	 dd| it        |fi |}|S )ri  r   r   r   Frj  r4  )vit_relpos_base_patch16_rpn_224rK  rL  s       r>   ro  ro    sM     B"uWikJ-)`6@`DHD^W]D^`ELr?   r'  r&  )Dr(  loggingr   	functoolsr   typingr   r   r   r   r   r	   ImportErrortyping_extensionsrO   torch.nnr,   	torch.jitr
   	timm.datar   r   timm.layersr   r   r   r   r   r   r   _builderr   	_featuresr   _manipulater   r   	_registryr   r   vision_transformerr   __all__	getLoggerra   _loggerr+  r   rk   rv   r   r   r7  rB  default_cfgsrJ  rO  rV  rY  r[  r_  ra  rc  re  rg  rk  rm  ro  r   r?   r>   <module>r     s#      5 5*    E c c c * + 0 < 4$
%
'

H
%?bii ?DF F-")) -`4 4n^bii ^B	 %48 X 5" 372-2X,0 P- .2 Q. ,0 O, .2 Q. /3 R/ 26 U2 2626 V3 37&15 U2 26I%& %P H_   D[   @W   AX   ?V   AX   BY   E\   CZ   	F] 	 	 D[   E\   CZ  Y  *))*s   H HH