
    kh                     :    d Z ddlZddlZddlmZ  G d de      Zy)a  RAdam Optimizer.
Implementation lifted from: https://github.com/LiyuanLucasLiu/RAdam
Paper: `On the Variance of the Adaptive Learning Rate and Beyond` - https://arxiv.org/abs/1908.03265

NOTE: This impl has been deprecated in favour of torch.optim.RAdam and remains as a reference
    N)	Optimizerc                   f     e Zd ZdZ	 	 	 	 d fd	Z fdZ ej                         dd       Z xZ	S )RAdamLegacyz PyTorch RAdam optimizer

    NOTE: This impl has been deprecated in favour of torch.optim.AdamW and remains as a reference
    c                     t        ||||t        d      D cg c]  }g d c}      }t        t        |   ||       y c c}w )N
   )NNN)lrbetasepsweight_decaybuffer)dictrangesuperr   __init__)	selfparamsr   r	   r
   r   _defaults	__class__s	           L/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/optim/radam.pyr   zRAdamLegacy.__init__   sF     %05b	:1&:
 	k4)&(; ;s   A c                 ,    t         t        |   |       y N)r   r   __setstate__)r   stater   s     r   r   zRAdamLegacy.__setstate__"   s    k4-e4    c                    d }|$t        j                         5   |       }d d d        | j                  D ]  }|d   D ]  }|j                  |j                  j	                         }|j
                  rt        d      |j	                         }| j                  |   }t        |      dk(  r6d|d<   t        j                  |      |d<   t        j                  |      |d<   n.|d   j                  |      |d<   |d   j                  |      |d<   |d   |d   }	}|d   \  }
}|	j                  |      j                  ||d|z
  	       |j                  |
      j                  |d|
z
  
       |dxx   dz  cc<   |d   t        |d   dz           }|d   |d   k(  r|d   |d   }}n|d   |d<   ||d   z  }dd|z
  z  dz
  }|d|d   z  |z  d|z
  z  z
  }||d<   |dk\  rI|d   t        j                   d|z
  |dz
  z  |dz
  z  |dz
  z  |z  |z  |dz
  z        z  d|
|d   z  z
  z  }n|d   d|
|d   z  z
  z  }||d<   |d   dk7  r|j                  ||d    |d   z  
       |dk\  r8|	j!                         j                  |d         }|j#                  ||| 	       n|j                  || 
       |j%                  |         |S # 1 sw Y   xY w)Nr   z'RAdam does not support sparse gradientsr   stepexp_avg
exp_avg_sqr	      )value)alphar   r         r      r   r
   )torchenable_gradparam_groupsgradfloat	is_sparseRuntimeErrorr   len
zeros_liketype_asmul_addcmul_add_intmathsqrtaddcdiv_copy_)r   closurelossgrouppr)   p_fp32r   r   r   beta1beta2bufferednum_sma	step_sizebeta2_tnum_sma_maxdenoms                     r   r   zRAdamLegacy.step%   sX   ""$ !y! && ;	 E8_ 9 66>vv||~>>&'PQQ

1u:?$%E&M','7'7'?E)$*/*:*:6*BE,'',Y'7'?'?'GE)$*/*=*E*Ef*ME,'&+I&6l8K$W~u&//d!e)/LU#((QY(?f" ?3uV}r/A+BC=HQK/)1!hqkYG"'-HQK#uV}4G"#q5y/A"5K)Af,=,G1w;,WWG")HQK !|$)$K$))[$q[*-81_>$q[*,34 (( ,7?<3= %= ABEUSY]DZ@Z	%\	 %*$K1uf7M3M$N	"+HQK(A-KKu^/D.DuT{.RKS a<&OO-225<@EOOGU9*OEKK	zK:s9 ;	 z A! !s   KK)gMbP?)g?g+?g:0yE>r   r   )
__name__
__module____qualname____doc__r   r   r&   no_gradr   __classcell__)r   s   @r   r   r      s>     <"5 U]]_C Cr   r   )rH   r4   r&   torch.optim.optimizerr   r    r   r   <module>rM      s#      +]) ]r   