
    kh                     6    d dl Z d dlZd dlmZ  G d de      Zy)    N)	Optimizerc                   ^     e Zd ZdZ	 	 	 	 	 d fd	Z ej                         dd       Z xZS )NAdamLegacyaa  Implements Nadam algorithm (a variant of Adam based on Nesterov momentum).

    NOTE: This impl has been deprecated in favour of torch.optim.NAdam and remains as a reference

    It has been proposed in `Incorporating Nesterov Momentum into Adam`__.

    Arguments:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 2e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        schedule_decay (float, optional): momentum schedule decay (default: 4e-3)

    __ http://cs229.stanford.edu/proj2015/054_report.pdf
    __ http://www.cs.toronto.edu/~fritz/absps/momentum.pdf

        Originally taken from: https://github.com/pytorch/pytorch/pull/1408
        NOTE: Has potential issues but does work well on some problems.
    c                     d|k  st        dj                  |            t        |||||      }t        t        |   ||       y )Ng        zInvalid learning rate: {})lrbetasepsweight_decayschedule_decay)
ValueErrorformatdictsuperr   __init__)	selfparamsr   r   r	   r
   r   defaults	__class__s	           L/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/optim/nadam.pyr   zNAdamLegacy.__init__    sO     by8??CDD%)
 	k4)&(;    c           	      8   d}|$t        j                         5   |       }ddd       | j                  D ]  }|d   D ]  }|j                  |j                  }| j                  |   }t        |      dk(  r:d|d<   d|d<   t        j                  |      |d<   t        j                  |      |d<   |d   }|d	   }|d   |d   }
}	|d
   \  }}|d   }|dxx   dz  cc<   |d   }d||z  z
  }|d   dk7  r|j                  ||d         }|ddd||z  z  z  z
  z  }|ddd|dz   |z  z  z  z
  z  }||z  }||z  |z  }||d<   |	j                  |      j                  |d|z
         |
j                  |      j                  ||d|z
         |
j                         t        j                  |      z  j                  |      }|j                  |||d    d|z
  z  d|z
  z         |j                  |	||d    |z  d|z
  z           |S # 1 sw Y   xY w)zPerforms a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   stepg      ?
m_scheduleexp_avg
exp_avg_sqr   r   r	      r
   )alphag      ?gQ?)valuer   )torchenable_gradparam_groupsgradstatelen
zeros_likeaddmul_add_addcmul_sqrtmathaddcdiv_)r   closurelossgrouppr"   r#   r   r   r   r   beta1beta2r	   tbias_correction2momentum_cache_tmomentum_cache_t_1m_schedule_newm_schedule_nextdenoms                        r   r   zNAdamLegacy.step4   s    ""$ !y! && '	mE8_ &m66>vv

1 u:?$%E&M*,E,'','7'7':E)$*/*:*:1*=E,' #<0
!&'7!8&+I&6l8K$W~uElf"&M#$uz> (A-88AU>-B8CD#(B^AS8T1U,U#V %*b3$AE^C[:\3].]%^"!+.>!>",/?"?BT"T&4l# U#((R%Z(@&//d"u*/M#*TYY7G-HHNNsS

4uT{lbCS>S.TXZ]kXk.l
m

7E%+@R1RVX[jVj1k
lM&m'	mR Y! !s   HH)gMb`?)g?g+?g:0yE>r   gMbp?)N)	__name__
__module____qualname____doc__r   r   no_gradr   __classcell__)r   s   @r   r   r      s:    6 <( U]]_5 5r   r   )r+   r   torch.optim.optimizerr   r    r   r   <module>rB      s      +c) cr   