
    kh-                         d Z ddlmZmZmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZmZ dd	lmZ  G d
 dej&                        Z G d dej&                        Zy)a:   Attention Pool 2D

Implementations of 2D spatial feature pooling using multi-head attention instead of average pool.

Based on idea in CLIP by OpenAI, licensed Apache 2.0
https://github.com/openai/CLIP/blob/3b473b0e682c091a9e53623eebc1ca1657385717/clip/model.py

Hacked together by / Copyright 2021 Ross Wightman
    )OptionalUnionTupleN   )use_fused_attn)	to_2tuple)resample_abs_pos_embed)apply_rot_embedRotaryEmbedding)trunc_normal_c                   F    e Zd ZU dZej
                  j                  e   ed<   	 	 	 	 	 	 	 	 	 	 dde	de
e	   dee	ee	e	f   f   de
e	   de
e	   de
e	   d	ed
edededef fdZddefdZdde
e	   de
e   fdZdej$                  de	de	dej$                  fdZddefdZ xZS )RotAttentionPool2daB   Attention based 2D feature pooling w/ rotary (relative) pos embedding.
    This is a multi-head attention based replacement for (spatial) average pooling in NN architectures.

    Adapted from the AttentionPool2d in CLIP w/ rotary embedding instead of learned embed.
    https://github.com/openai/CLIP/blob/3b473b0e682c091a9e53623eebc1ca1657385717/clip/model.py

    NOTE: While this impl does not require a fixed feature size, performance at differeing resolutions from
    train varies widely and falls off dramatically. I'm not sure if there is a way around this... -RW
    
fused_attnin_featuresout_featuresref_feat_size	embed_dimhead_dim	num_headsqkv_biasqkv_separate	pool_typeclass_token	drop_ratec                 z   t         |           |	dv sJ |xs |x| _        }|| _        |xs || _        t        |      }|||z  dk(  sJ ||z  }n||z  dk(  sJ ||z  }|| _        || _        |	j                         | _	        | j                  dz  | _
        t               | _        |
r/t        j                  t        j                   d|            | _        nd | _        |r_t        j$                  |||      | _        t        j$                  |||      | _        t        j$                  |||      | _        d | _        n t        j$                  ||dz  |      | _        t        j.                  |      | _        t        j$                  || j                        | _        t5        | j                  d|      | _        y )	N tokenr         r   bias   F)	in_pixelsref_feat_shape)super__init__r   r   r   r   r   r   lowerr   scaler   r   nn	Parametertorchzeros	cls_tokenLinearqkvqkvDropoutdropprojr   	pos_embed)selfr   r   r   r   r   r   r   r   r   r   r   	__class__s               X/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/layers/attention_pool2d.pyr&   zRotAttentionPool2d.__init__"   s    	M)))%.%=+=&(7K!-0 y(A--- I-Hx'1,,,!X-I" "*]]d*
(*\\%++a*CDDN!DNYY{IHEDFYY{IHEDFYY{IHEDFDHyyi!m(KDHJJy)	IIi):):;	(%Xef    zero_init_lastc                 0   | j                   | j                  j                  }t        | j                  j                  |dz         t
        j                  j                  | j                  j                         t        | j                  j                  |dz         t
        j                  j                  | j                  j                         t        | j                  j                  |dz         t
        j                  j                  | j                  j                         y | j                   j                  }t        | j                   j                  |dz         t
        j                  j                  | j                   j                         y Nr   )std)r2   r/   r   r   weightr)   initzeros_r!   r0   r1   r7   r;   r   s      r9   init_weightszRotAttentionPool2d.init_weightsR   s    88&&,,K$&&--[D-@AGGNN466;;'$&&--[D-@AGGNN466;;'$&&--[D-@AGGNN466;;'((..K$((//{d/BCGGNN488==)r:   num_classesc                     ||dv sJ || _         |W|dkD  r t        j                  | j                  |      nt        j                         | _        |dkD  r|n| j                  | _        y y Nr   r   r   r)   r.   r   Identityr5   r   r   r7   rD   r   s      r9   resetzRotAttentionPool2d.reset`   g     ---&DN"DORSO		$"2"2K@Y[YdYdYfDI/:QDNND #r:   xHWreturnc                     | j                   dk(  r|d d df   }|S |d d dd f   j                  |j                  d   ||d      j                  dddd      }|S Nr   r   r   r"      r   reshapeshapepermuter7   rL   rM   rN   s       r9   _poolzRotAttentionPool2d._pooli   b    >>W$!Q$A  !QR%  Q26>>q!QJAr:   
pre_logitsc                 :   |j                   \  }}}}||z  }|j                  d      j                  dd      }| j                  +t	        j
                  |j                  dd      |gd      }nAt	        j
                  | j                  j                  |j                   d   dd      |gd      }| j                  | j                  |      j                  ||dz   | j                  | j                        j                  dd      }| j                  |      j                  ||dz   | j                  | j                        j                  dd      }	| j                  |      j                  ||dz   | j                  | j                        j                  dd      }
nc| j                  |      j                  ||dz   d| j                  | j                        j                  ddddd	      }|j!                  d      \  }}	}
| j"                  j%                  ||f      \  }}t	        j
                  |d d d d d dd d f   t'        |d d d d dd d d f   ||      gd      j)                  |
      }t	        j
                  |	d d d d d dd d f   t'        |	d d d d dd d d f   ||      gd      j)                  |
      }	| j*                  r"t,        j.                  j1                  ||	|
      }n;|| j2                  z  }||	j                  d
d      z  }|j5                  d      }||
z  }|j                  dd      j                  ||dz   d      }| j7                  |      }|r| j9                  |||      }|S | j;                  |      }| j9                  |||      }|S )NrS   r   Tkeepdimdimr   rR   r"      )rV   flatten	transposer-   r+   catmeanexpandr2   r/   rU   r   r   r0   r1   rW   unbindr6   	get_embedr
   type_asr   r)   
functionalscaled_dot_product_attentionr(   softmaxr4   rY   r5   )r7   rL   r[   B_rM   rN   Nr/   r0   r1   rserceattns                 r9   forwardzRotAttentionPool2d.forwardq   s   WW
1aEIIaL""1a(>>!		166!T62A6A>A		4>>00RDaHaPA88q	!!!QUDNNDMMJTTUVXYZAq	!!!QUDNNDMMJTTUVXYZAq	!!!QUDNNDMMJTTUVXYZA##Aq1uaOWWXY[\^_abdefAhhqkGAq!>>++QF3SIIqArr1~qAqr1~sC'PQWXYaabcdIIqArr1~qAqr1~sC'PQWXYaabcd??::1aCADJJAq{{2r**D<<B<'DqAKK1%%aQ3IIaL

1a#AHIIaLJJq!Qr:   )
N   N@   NTFr   F        FNN)__name__
__module____qualname____doc__r+   jitFinalbool__annotations__intr   r   r   strfloatr&   rC   rJ   TensorrY   rt   __classcell__r8   s   @r9   r   r      s<    		%%
 +/9:'+&('+!!&$ %!.g.g #3-.g !eCHo!56	.g
  }.g sm.g  }.g .g .g .g .g .g`*4 *S# S(3- Su||    "T "r:   r   c                   F    e Zd ZU dZej
                  j                  e   ed<   	 	 	 	 	 	 	 	 	 	 dde	de
e	ee	e	f   f   dee	   dee	   dee	   dee	   d	ed
edededef fdZddefdZddee	   dee   fdZdej$                  de	de	dej$                  fdZddefdZ xZS )AttentionPool2da   Attention based 2D feature pooling w/ learned (absolute) pos embedding.
    This is a multi-head attention based replacement for (spatial) average pooling in NN architectures.

    It was based on impl in CLIP by OpenAI
    https://github.com/openai/CLIP/blob/3b473b0e682c091a9e53623eebc1ca1657385717/clip/model.py

    NOTE: This requires feature size upon construction and well prevent adaptive sizing of the network.
    r   r   	feat_sizer   r   r   r   r   r   r   r   r   c                 6   t         |           |	dv sJ |xs |x| _        }|| _        |xs || _        |||z  dk(  sJ ||z  }n||z  dk(  sJ ||z  }t        |      | _        | j                  d   | j                  d   z  | _        || _        || _	        |	| _
        | j                  dz  | _        t               | _        |
r/t        j                  t!        j"                  d|            | _        nd | _        |r_t        j&                  |||      | _        t        j&                  |||      | _        t        j&                  |||      | _        d | _        n5d x| _        x| _        | _        t        j&                  ||dz  |      | _        t        j0                  |      | _        t        j&                  || j                        | _        t        j                  t!        j"                  | j                  dz   |            | _        | j9                          y )Nr   r   r   r   r    r"   )r%   r&   r   r   r   r   r   seq_lenr   r   r   r(   r   r   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   rC   )r7   r   r   r   r   r   r   r   r   r   r   r   r8   s               r9   r&   zAttentionPool2d.__init__   s    	M)))%.%=+=&(7K y(A--- I-Hx'1,,,!X-I"9-~~a(4>>!+<<" "]]d*
(*\\%++a*CDDN!DNYY{IHEDFYY{IHEDFYY{IHEDFDH'++DF+TVdfyyi!m(KDHJJy)	IIi):):;	ekk$,,2BK&PQr:   r;   c                 d   | j                   | j                  j                  }t        | j                  j                  |dz         t
        j                  j                  | j                  j                         t        | j                  j                  |dz         t
        j                  j                  | j                  j                         t        | j                  j                  |dz         t
        j                  j                  | j                  j                         nm| j                   j                  }t        | j                   j                  |dz         t
        j                  j                  | j                   j                         t        | j                  |dz         y r=   )r2   r/   r   r   r?   r)   r@   rA   r!   r0   r1   r6   rB   s      r9   rC   zAttentionPool2d.init_weights   s    88&&,,K$&&--[D-@AGGNN466;;'$&&--[D-@AGGNN466;;'$&&--[D-@AGGNN466;;'((..K$((//{d/BCGGNN488==)dnn+*=>r:   rD   c                     ||dv sJ || _         |W|dkD  r t        j                  | j                  |      nt        j                         | _        |dkD  r|n| j                  | _        y y rF   rG   rI   s      r9   rJ   zAttentionPool2d.reset   rK   r:   rL   rM   rN   rO   c                     | j                   dk(  r|d d df   }|S |d d dd f   j                  |j                  d   ||d      j                  dddd      }|S rQ   rT   rX   s       r9   rY   zAttentionPool2d._pool   rZ   r:   r[   c                    |j                   \  }}}}||z  }|j                  d      j                  dd      }| j                  +t	        j
                  |j                  dd      |gd      }nAt	        j
                  | j                  j                  |j                   d   dd      |gd      }t        | j                  j                  d      ||fd      }||z   }| j                  | j                  |      j                  ||dz   | j                  | j                        j                  dd      }	| j!                  |      j                  ||dz   | j                  | j                        j                  dd      }
| j#                  |      j                  ||dz   | j                  | j                        j                  dd      }n`| j                  |      j                  |dd	| j                  | j                        j%                  ddd	dd
      }|j'                  d      \  }	}
}| j(                  r"t*        j,                  j/                  |	|
|      }n;|	| j0                  z  }	|	|
j                  dd      z  }|j3                  d      }||z  }|j                  dd      j                  ||dz   d      }| j5                  |      }|r| j7                  |||      }|S | j9                  |      }| j7                  |||      }|S )NrS   r   Tr]   r_   r   rR   )num_prefix_tokensr"   ra   rb   )rV   rc   rd   r-   r+   re   rf   rg   r	   r6   	unsqueezer2   r/   rU   r   r   r0   r1   rW   rh   r   r)   rk   rl   r(   rm   r4   rY   r5   )r7   rL   r[   rn   ro   rM   rN   rp   r6   r/   r0   r1   rs   s                r9   rt   zAttentionPool2d.forward   s   WW
1aEIIaL""1a(>>!		166!T62A6A>A		4>>00RDaHaPA*4>>+C+CA+FAbcd		M88q	!!!QUDNNDMMJTTUVXYZAq	!!!QUDNNDMMJTTUVXYZAq	!!!QUDNNDMMJTTUVXYZA##Ar1dnndmmLTTUVXY[\^_abcAhhqkGAq!??::1aCADJJAq{{2r**D<<B<'DqAKK1%%aQ3IIaL

1a#AHIIaLJJq!Qr:   )
ru   NNrv   NTFr   Frw   rx   ry   )rz   r{   r|   r}   r+   r~   r   r   r   r   r   r   r   r   r   r&   rC   rJ   r   rY   rt   r   r   s   @r9   r   r      s0    		%%
 67*.'+&('+!!&$ %!22 S%S/122 #3-	2
  }2 sm2  }2 2 2 2 2 2h?4 ?S# S(3- Su||    !T !r:   r   )r}   typingr   r   r   r+   torch.nnr)   configr   helpersr   r6   r	   pos_embed_sincosr
   r   weight_initr   Moduler   r    r:   r9   <module>r      sL    * )   "  - > &} }@@bii @r:   