
    kh-              "       P   d Z ddlZddlmZmZmZ ddlZddlmZ ddlm	Z	  G d de
      Z G d d	e	      Zd
ee   dee   dee   dee   dee   dee   dededededededededededef"dZd
ee   dee   dee   dee   dee   dee   dededededededededededef"dZy)z Adan Optimizer

Adan: Adaptive Nesterov Momentum Algorithm for Faster Optimizing Deep Models[J]. arXiv preprint arXiv:2208.06677, 2022.
    https://arxiv.org/abs/2208.06677

Implementation adapted from https://github.com/sail-sg/Adan
    N)ListOptionalTuple)Tensor)	Optimizerc                        e Zd ZdZdZd Zd Zy)MultiTensorApplyFc                     	 dt         _        || _        y # t        $ r }dt         _        |t         _        Y d }~y d }~ww xY w)NTF)r	   	available
chunk_sizeImportError
import_err)selfr   errs      K/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/optim/adan.py__init__zMultiTensorApply.__init__"   s<    	.)-&(DO 	.).&*-''	.s    	>9>c                 ,     || j                   ||g| S N)r   )r   opnoop_flag_buffertensor_listsargss        r   __call__zMultiTensorApply.__call__*   s    $//#3\IDII    N)__name__
__module____qualname__r   warnedr   r    r   r   r	   r	      s    IF.Jr   r	   c                        e Zd ZdZ	 	 	 	 	 	 	 ddedeeeef   dededededee   f fd	Z fd
Z	 e
j                         d        Z e
j                         dd       Z xZS )Adana   Implements a pytorch variant of Adan.

    Adan was proposed in Adan: Adaptive Nesterov Momentum Algorithm for Faster Optimizing Deep Models
    https://arxiv.org/abs/2208.06677

    Arguments:
        params: Iterable of parameters to optimize or dicts defining parameter groups.
        lr: Learning rate.
        betas: Coefficients used for first- and second-order moments.
        eps: Term added to the denominator to improve numerical stability.
        weight_decay: Decoupled weight decay (L2 penalty)
        no_prox: How to perform the weight decay
        caution: Enable caution from 'Cautious Optimizers'
        foreach: If True would use torch._foreach implementation. Faster but uses slightly more memory.
    lrbetasepsweight_decayno_proxcautionforeachc	           	         d|k  st        dj                  |            d|k  st        dj                  |            d|d   cxk  rdk  sn t        dj                  |d               d|d   cxk  rdk  sn t        dj                  |d               d|d	   cxk  rdk  sn t        d
j                  |d	               t        |||||||      }	t        
|   ||	       y )N        zInvalid learning rate: {}zInvalid epsilon value: {}r         ?z%Invalid beta parameter at index 0: {}   z%Invalid beta parameter at index 1: {}   z%Invalid beta parameter at index 2: {})r"   r#   r$   r%   r&   r'   r(   )
ValueErrorformatdictsuperr   )r   paramsr"   r#   r$   r%   r&   r'   r(   defaults	__class__s             r   r   zAdan.__init__?   s     by8??CDDcz8??DEEeAh$$DKKERSHUVVeAh$$DKKERSHUVVeAh$$DKKERSHUVV%
 	*r   c                     t         t        |   |       | j                  D ]&  }|j	                  dd       |j	                  dd       ( y )Nr&   Fr'   )r1   r!   __setstate__param_groups
setdefault)r   stategroupr4   s      r   r6   zAdan.__setstate___   sG    dD&u-&& 	/EY.Y.	/r   c                    | j                   D ]u  }d|d<   |d   D ]f  }|j                  s| j                  |   }t        j                  |      |d<   t        j                  |      |d<   t        j                  |      |d<   h w y )Nr   stepr2   exp_avg
exp_avg_sqexp_avg_diff)r7   requires_gradr9   torch
zeros_like)r   r:   pr9   s       r   restart_optzAdan.restart_opte   s    && 	@EE&M8_ 
@?? JJqME (-'7'7':E)$*/*:*:1*=E,',1,<,<Q,?E.)
@	@r   c                    d}|$t        j                         5   |       }ddd       	 dt         j                  j                  j                  j                         v }| j                  D ]  }g }g }g }g }g }	g }
|d   \  }}}d|v r|dxx   dz  cc<   nd|d<   d||d   z  z
  }d||d   z  z
  }d||d   z  z
  }|d   D ]  }|j                  |j                  |       |j                  |j                         | j                  |   }t        |      d	k(  rHt        j                  |      |d
<   t        j                  |      |d<   t        j                  |      |d<   d|vs|d   dk(  r|j                  j                          |d<   |j                  |d
          |j                  |d          |	j                  |d          |
j                  |d           |sy|d   |d    xs |}n|d   }|rt        }nt        } |||f|||	|
|||||t        j                   |      |d   |d   |d   |d   |d   d  |S # 1 sw Y   'xY w#  d}Y xY w)z$Performs a single optimization step.NScalarFr#   r<   r,   r+   r2   r   r=   r>   r?   neg_pre_gradr(   r'   r"   r%   r$   r&   )exp_avgsexp_avg_sqsexp_avg_diffsneg_pre_gradsbeta1beta2beta3bias_correction1bias_correction2bias_correction3_sqrtr"   r%   r$   r&   r'   )rA   enable_gradopsaten_foreach_maximum_	overloadsr7   gradappendr9   lenrB   clone_multi_tensor_adan_single_tensor_adanmathsqrt)r   closurelosshas_scalar_maximumr:   params_with_gradgradsrH   rI   rJ   rK   rL   rM   rN   rO   rP   bias_correction3rC   r9   use_foreachfuncs                        r   r<   z	Adan.stepu   s    ""$ !y!	'!)UYY^^-M-M-W-W-Y!Y && G	E!EHKMM"'.E5% f" !f"UeFm%;;"UeFm%;;"UeFm%;;8_ <66> ''*QVV$

1u:?','7'7':E)$*/*:*:1*=E,',1,<,<Q,?E.)!.%-12D-.VV\\^OE.)i 01""5#67$$U>%:;$$U>%:;%<( $Y'"'	"22H6H#I.)*  "'++!1!1&*ii0@&A;">2%Li(i(#kG	R c! !
	'!&s   I4I II)MbP?)g\(\?gq=
ףp?gGz?g:0yE>r*   FFNr   )r   r   r   __doc__floatr   boolr   r   r6   rA   no_gradrD   r<   __classcell__)r4   s   @r   r!   r!   .   s    $ 0B"%!!&*++ u,-+ 	+
  + + + d^+@/ U]]_@ @ U]]_U Ur   r!   r2   rc   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   r"   r%   r$   r&   r'   c                   t        |       D ]  \  }}||   }||   }||   }||   }||   }|j                  |       |j                  |      j                  |d|z
         |j                  |      j                  |d|z
         |j                  |      j                  |       |j                  |      j                  ||d|z
         |j	                         |z  j                  |      }||z  |
z  }||	z  }|rU||z  dkD  j                  |j                        }|j                  |j                         j                  d             ||z  }|rB|j                  d||z  z
         |j                  |||        |j                  |||        nA|j                  |||        |j                  |||        |j                  d||z  z          |j                         j                  |d        y )Nr,   alphavaluer   rg   )min      )	enumerateadd_mul_addcmul_r^   todtypediv_meanclamp_addcdiv_zero_)r2   rc   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   r"   r%   r$   r&   r'   iparamrW   r=   r>   r?   neg_grad_or_diffdenomstep_size_diff	step_sizemasks                               r   r\   r\      s   ( f% #85Qx1+ ^
$Q'(+ 	d#U  QY 7% %%&6a%i%He$))$/''(8:JRSV[R['\"%::@@Ee&66))	dNQ&**4::6DIIdiik((T(23nGJJq2,,-NN7E)N<NN<~oNFNN7E)N<NN<~oNFJJq2,,- %%d$%7G#8r   c                   t        |       dk(  ry t        j                  ||       t        j                  ||       t        j                  ||d|z
         t        j                  ||       t        j                  ||d|z
         t        j                  ||       t        j                  ||       t        j                  ||       t        j                  |||d|z
         t        j
                  |      }t        j                  ||       t        j                  ||       ||z  |
z  }||	z  }|rt        j                  ||      }t        ||      D cg c]#  \  }}|dkD  j                  |j                        % }}}|D cg c]  }|j                          }}t        j                  |d       t        j                  ||       t        j                  ||      }|rQt        j                  | d||z  z
         t        j                  | |||        t        j                  | |||        nPt        j                  | |||        t        j                  | |||        t        j                  | d||z  z          t        j                  |       t        j                  ||d       y c c}}w c c}w )Nr   r,   rn   rp   rg   rs   )rY   rA   _foreach_add__foreach_mul__foreach_addcmul__foreach_sqrt_foreach_div__foreach_mulziprx   ry   r{   rU   _foreach_addcdiv__foreach_zero_)r2   rc   rH   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   r"   r%   r$   r&   r'   r   r   r   masksmg
mask_scales                           r   r[   r[     sL   ( 6{a 
u-	%(	%q5y9	u-	}AIF	u-	u-	U+	KQQVYW,E	45	s#%Z"22N%%I""8U314UE1BCA!a%AGG$CC(-.1affh.
.
D1E:.%%h6FA\(9$9:%	zJu^OT%	zJu^OTFA\(9$9:	'	uD9! D.s   (J6
J<)rh   r]   typingr   r   r   rA   r   torch.optim.optimizerr   objectr	   r!   ri   rj   r\   r[   r   r   r   <module>r      s  *  ( (   +Jv J ]9 ]@78V78F|78 v,78 &\	78
 F|78 F|78 78 78 78  78  78  %78 78 78  !78" #78$ %78t?:V?:F|?: v,?: &\	?:
 F|?: F|?: ?: ?: ?:  ?:  ?:  %?: ?: ?:  !?:" #?:$ %?:r   