
    kh.                     2    d Z ddlZddlmZ  G d de      Zy)an   PyTorch LARS / LARC Optimizer

An implementation of LARS (SGD) + LARC in PyTorch

Based on:
  * PyTorch SGD: https://github.com/pytorch/pytorch/blob/1.7/torch/optim/sgd.py#L100
  * NVIDIA APEX LARC: https://github.com/NVIDIA/apex/blob/master/apex/parallel/LARC.py

Additional cleanup and modifications to properly support PyTorch XLA.

Copyright 2021 Ross Wightman
    N)	Optimizerc                   p     e Zd ZdZ	 	 	 	 	 	 	 	 	 d fd	Z fdZ ej                         dd       Z xZ	S )Larsa   LARS for PyTorch
    
    Paper: `Large batch training of Convolutional Networks` - https://arxiv.org/pdf/1708.03888.pdf

    Args:
        params (iterable): iterable of parameters to optimize or dicts defining parameter groups.
        lr (float, optional): learning rate (default: 1.0).
        momentum (float, optional): momentum factor (default: 0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        dampening (float, optional): dampening for momentum (default: 0)
        nesterov (bool, optional): enables Nesterov momentum (default: False)
        trust_coeff (float): trust coefficient for computing adaptive lr / trust_ratio (default: 0.001)
        eps (float): eps for division denominator (default: 1e-8)
        trust_clip (bool): enable LARC trust ratio clipping (default: False)
        always_adapt (bool): always apply LARS LR adapt, otherwise only when group weight_decay != 0 (default: False)
    c                     |dk  rt        d|       |dk  rt        d|       |dk  rt        d|       |r|dk  s|dk7  rt        d      t        ||||||||	|
	      }t        |   ||       y )Ng        zInvalid learning rate: zInvalid momentum value: zInvalid weight_decay value: r   z8Nesterov momentum requires a momentum and zero dampening)	lrmomentum	dampeningweight_decaynesterovtrust_coeffeps
trust_clipalways_adapt)
ValueErrordictsuper__init__)selfparamsr   r   r	   r
   r   r   r   r   r   defaults	__class__s               K/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/optim/lars.pyr   zLars.__init__#   s     86rd;<<c>7zBCC#;L>JKKQ)q.WXX%#!%

 	*    c                 j    t         |   |       | j                  D ]  }|j                  dd        y )Nr   F)r   __setstate__param_groups
setdefault)r   stategroupr   s      r   r   zLars.__setstate__F   s5    U#&& 	0EZ/	0r   c           
         d}|$t        j                         5   |       }ddd       | j                  D ]  }|d   }|d   }|d   }|d   }|d   }|d   }	|d   D ]k  }
|
j                  |
j                  }|d	k7  s|d
   r|
j	                  d      }|j	                  d      }||z  |||z  z   |	z   z  }t        j
                  |d	kD  t        j
                  |d	kD  |d      d      }|d   rt        j                  ||d   z  d      }|j                  |
|       |j                  |       |d	k7  r~| j                  |
   }d|vr)t        j                  |      j                         x}|d<   n*|d   }|j                  |      j                  |d|z
         |r|j                  ||      }n|}|
j                  ||d           n  |S # 1 sw Y   xY w)zPerforms a single optimization step.

        Args:
            closure (callable, optional): A closure that reevaluates the model and returns the loss.
        Nr
   r   r	   r   r   r   r   r   r   g       @      ?r   r   )max)alphamomentum_buffer)torchenable_gradr   gradnormwhereclampadd_mul_r   clonedetachadd)r   closurelossr   r
   r   r	   r   r   r   pr'   w_normg_normtrust_ratioparam_statebufs                    r   stepz	Lars.stepK   s    ""$ !y! && ,	1E 0LZ(Hk*IZ(H.K,C8_ $166>vv  1$n(=VVC[F!YYs^F"-"6&6LCX:X[^:^"_K #(++
FQJSA#K
 \*&+kk+d2KQT&UIIa|I4IIk* q="&**Q-K(;?D{{4?P?W?W?YYk*;<)*;<*//BN/K#xx8x<"tE$K<0I$1,	1\ c! !s   GG)	r!   r   r   r   FgMbP?g:0yE>FF)N)
__name__
__module____qualname____doc__r   r   r%   no_gradr8   __classcell__)r   s   @r   r   r      sL    ( !+F0
 U]]_9 9r   r   )r<   r%   torch.optim.optimizerr   r    r   r   <module>rA      s      +t9 tr   