
    kh                         d Z ddlmZmZ ddlZddlmZ 	 ddlm	Z	 dZ
	 ddlmZmZ dZ eed	      Zdadd
efdZdd
efdZd ZddZ	 	 	 d dej.                  dedeej.                     deej.                     dedej.                  fdZ	 	 	 d dej.                  dee   deej.                     deej.                     dedej.                  fdZ	 	 d!dej.                  dee   deej.                     defdZ	 	 d!dej.                  dee   deej.                     dedej.                  f
dZ	 	 d!dej.                  dee   deej.                     defdZ	 	 d!dej.                  dee   deej.                     dedej.                  f
dZ	 	 d!dej.                  dee   deej.                     defdZ 	 	 d!dej.                  dee   deej.                     dedej.                  f
dZ!y# e$ r dZ
Y w xY w# e$ r dZY w xY w)"a   'Fast' Normalization Functions

For GroupNorm and LayerNorm these functions bypass typical AMP upcast to float32.

Additionally, for LayerNorm, the APEX fused LN is used if available (which also does not upcast)

Hacked together by / Copyright 2022 Ross Wightman
    )ListOptionalN)
functional)fused_layer_norm_affineTF)fused_rms_norm_affinefused_rms_normrms_normdevicec                     	 t        j                  |       S # t        t        f$ r9 | dk(  rt        j                         cY S | dk(  sJ t        j
                         cY S w xY wNcpucuda)torchget_autocast_dtypeAttributeError	TypeErrorget_autocast_cpu_dtypeget_autocast_gpu_dtyper
   s    Q/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/layers/fast_norm.pyr   r   !   s_    2''//I& 2U?//11V###//112s    (AAAc                     	 t        j                  |       S # t        $ r9 | dk(  rt        j                         cY S | dk(  sJ t        j                         cY S w xY wr   )r   is_autocast_enabledr   is_autocast_cpu_enabledr   s    r   r   r   -   sZ    /((00 /U?0022V###,,../s    "AAAc                      t         S N_USE_FAST_NORM     r   is_fast_normr    9   s    r   c                     | a y r   r   )enables    r   set_fast_normr#   =   s    Nr   x
num_groupsweightbiasepsreturnc                 "   t         j                  j                         rt        j                  | ||||      S t        | j                  j                        rVt        | j                  j                        }| j                  |      |j                  |      ||j                  |      nd }}} t         j                  j                  | j                  j                  d      5  t        j                  | ||||      cd d d        S # 1 sw Y   y xY wNFdevice_typeenabled)r   jitis_scriptingF
group_normr   r
   typer   toampautocast)r$   r%   r&   r'   r(   dts         r   fast_group_normr8   B   s     yy||Az64==188==)  .$$r(FIIbM$BR4772;X\46			u		E >||Az64=> > >s   "DDnormalized_shapec                 L   t         j                  j                         rt        j                  | ||||      S t
        rt        | ||||      S t        | j                  j                        rVt        | j                  j                        }| j                  |      |j                  |      ||j                  |      nd }}} t         j                  j                  | j                  j                  d      5  t        j                  | ||||      cd d d        S # 1 sw Y   y xY wr+   )r   r/   r0   r1   
layer_normhas_apexr   r   r
   r3   r   r4   r5   r6   )r$   r9   r&   r'   r(   r7   s         r   fast_layer_normr=   W   s     yy||A/sCC&q&$8H#NN188==)  .$$r(FIIbM$BR4772;X\46			u		E D||A/sCD D Ds   7DD#c                 ~   t        |      }| j                  d      }t        j                  j	                         r.|dk(  sJ t        j
                  |d      j                  d      }n2t        t        d| dz
  d            }t        j
                  ||d      }| t        j                  ||z         z  } || |z  } | S )N      dimTrC   keepdim)
lenpowr   r/   r0   mean	unsqueezetuplerangersqrtr$   r9   r&   r(   	norm_ndimvdimss          r   r	   r	   o   s     $%I	aAyy A~~JJqb!++B/U2	zA~r23JJqdD1	EKKC  AJHr   c                 T   t         j                  j                         rt        | |||      S t        r|t        | ||      S t        | |||      S t        | j                  j                        rAt        | j                  j                        }| j                  |      |j                  |      }} t         j                  j                  | j                  j                  d      5  t        rt        j                  | |||      } nt        | |||      } d d d        | S # 1 sw Y   | S xY wr+   )r   r/   r0   r	   has_apex_rmsnormr   r   r   r
   r3   r   r4   r5   r6   has_torch_rms_normr1   r$   r9   r&   r(   r7   s        r   fast_rms_normrU      s     yy+VS99>!!%5s;;(F4DcJJ188==)  .DDHfiim6			u		E ;

1.<A,fc:A	; H; Hs   %.DD'c                     t        |      dk(  sJ | j                  d      }t        j                  |dd      }| t        j                  ||z         z  } || |j                  dddd      z  } | S )Nr@   r?   TrD   rA   )rF   rG   r   rH   rL   reshape)r$   r9   r&   r(   rO   s        r   
rms_norm2drX      sr      A%%%	aA

1!T*A	EKKC  Aq"a++Hr   c                 h   t         j                  j                         rt        | |||      S t        rF| j                  dddd      } |t        | ||      } nt        | |||      } | j                  dddd      } t        | j                  j                        rAt        | j                  j                        }| j                  |      |j                  |      }} t         j                  j                  | j                  j                  d      5  t        | |||      } d d d        | S # 1 sw Y   | S xY w)Nr   r?      r@   Fr,   )r   r/   r0   rX   rR   permuter   r   r   r
   r3   r   r4   r5   r6   rT   s        r   fast_rms_norm2dr\      s    yy!-vs;;IIaAq!>q"2C8A%a1A3GAIIaAq!188==)  .DDHfiim6			u		E 9q*FC89 H9 Hs   D''D1c                 \   t        |      }t        j                  j                         r.|dk(  sJ t        j                  | d      j                  d      }n2t        t        d| dz
  d            }t        j                  | |d      }| t        j                  ||z         z  } || |z  } | S )Nr@   rA   rB   TrD   )	rF   r   r/   r0   varrI   rJ   rK   rL   rM   s          r   simple_normr_      s     $%Iyy A~~IIaR **2.U2	zA~r23IIaT40	EKKC  AJHr   c                    t         j                  j                         rt        | |||      S t	        | j
                  j                        rAt        | j
                  j                        }| j                  |      |j                  |      }} t         j                  j                  | j
                  j                  d      5  t        | |||      } d d d        | S # 1 sw Y   | S xY wr+   )r   r/   r0   r_   r   r
   r3   r   r4   r5   r6   rT   s        r   fast_simple_normra      s     yy1.<<188==)  .DDHfiim6			u		E :+VS9:H:Hs   CC%)r   )T)NNh㈵>)Nrb   )"__doc__typingr   r   r   torch.nnr   r1   #apex.normalization.fused_layer_normr   r<   ImportErrorr   r   rR   hasattrrS   r   strr   r   r    r#   Tensorintfloatr8   r=   r	   rU   rX   r\   r_   ra   r   r   r   <module>rm      s   "  $KHY
 Q
+  	2s 	2	/ 	/ &*#'>||>> U\\"> 5<<
 	>
 
> \\>0 &*#'D||D3iD U\\"D 5<<
 	D
 
D \\D6 &*	||3i U\\" 
	4 &*	||3i U\\" 
	
 \\D &*	||3i U\\" 
	$ &*	||3i U\\" 
	
 \\B &*	||3i U\\" 
	2 &*	||3i U\\" 
	
 \\o  H  s"   G+ 
G9 +G65G69HH