
    khY                     :    d Z ddlZddlmZ ddlZ G d de      Zy)a6   Nvidia NovoGrad Optimizer.
Original impl by Nvidia from Jasper example:
    - https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/SpeechRecognition/Jasper
Paper: `Stochastic Gradient Methods with Layer-wise Adaptive Moments for Training of Deep Networks`
    - https://arxiv.org/abs/1905.11286
    N)	Optimizerc                   j     e Zd ZdZ	 	 	 	 	 	 d fd	Z fdZ ej                         dd       Z xZ	S )
NvNovoGrada(  
    Implements Novograd algorithm.

    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.95, 0.98))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        grad_averaging: gradient averaging
        amsgrad (boolean, optional): whether to use the AMSGrad variant of this
            algorithm from the paper `On the Convergence of Adam and Beyond`_
            (default: False)
    c                    d|k  st        dj                  |            d|k  st        dj                  |            d|d   cxk  rdk  sn t        dj                  |d               d|d   cxk  rdk  sn t        dj                  |d               t        ||||||	      }t        t        |   ||       y )
Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}   z%Invalid beta parameter at index 1: {})lrbetasepsweight_decaygrad_averagingamsgrad)
ValueErrorformatdictsuperr   __init__)
selfparamsr   r	   r
   r   r   r   defaults	__class__s
            Q/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/optim/nvnovograd.pyr   zNvNovoGrad.__init__    s     by8??CDDcz8??DEEeAh$$DKKERSHUVVeAh$$DKKERSHUVV%)
 	j$(:    c                 r    t         t        |   |       | j                  D ]  }|j	                  dd        y )Nr   F)r   r   __setstate__param_groups
setdefault)r   stategroupr   s      r   r   zNvNovoGrad.__setstate__=   s7    j$,U3&& 	/EY.	/r   c                 2   d}|$t        j                         5   |       }ddd       | j                  D ]Q  }|d   D ]E  }|j                  |j                  }|j                  rt        d      |d   }| j                  |   }t        |      dk(  rd|d<   t        j                  |      |d<   t        j                  g       j                  |d   j                        |d<   |r4t        j                  g       j                  |d   j                        |d	<   |d   |d   }	}|r|d	   }
|d
   \  }}|dxx   dz  cc<   t        j                  t        j                  |d            }|	dk(  r|	j                  |       n%|	j                  |      j!                  |d|z
         |r;t        j"                  
|	|
       |
j%                         j!                  |d         }n"|	j%                         j!                  |d         }|j'                  |       |d   dk7  r|j!                  ||d          |d   r|j                  d|z
         |j                  |      j!                  |       |j!                  ||d           H T |S # 1 sw Y   mxY w)zPerforms a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
            and returns the loss.
        Nr   z#Sparse gradients are not supported.r   r   stepexp_avg
exp_avg_sqmax_exp_avg_sqr	   r      )alpha)outr
   r   r   r   )torchenable_gradr   grad	is_sparseRuntimeErrorr   len
zeros_likezerostodevicesumpowcopy_mul_add_maxsqrtdiv_)r   closurelossr   pr)   r   r   r!   r"   r#   beta1beta2normdenoms                  r   r    zNvNovoGrad.stepB   sr    ""$ !y! && 3	4E8_ 2466>vv>>&'LMM	*

1 u:?$%E&M','7'7':E)$*/++b/*<*<U9=M=T=T*UE,'27++b/2D2DU9EUE\E\2]./&+I&6l8K%*+;%<N$W~uf"yy4!34?$$T*OOE*//AI/FIInjnM*//166uU|DE&OO-225<@E		% (A-IIau^'<I=)*IIa%i(U#((.wuT{l3e243	4j q! !s   JJ)gMbP?)gffffff?g\(\?g:0yE>r   FF)N)
__name__
__module____qualname____doc__r   r   r'   no_gradr    __classcell__)r   s   @r   r   r      sD    *  ;:/
 U]]_A Ar   r   )rC   r'   torch.optim.optimizerr   mathr    r   r   <module>rI      s#     + w wr   