
    kh<                     N    d Z ddlZ G d dej                  j                        Zy)z AdaHessian Optimizer

Lifted from https://github.com/davda54/ada-hessian/blob/master/ada_hessian.py
Originally licensed MIT, Copyright 2020, David Samuel
    Nc                        e Zd ZdZ	 	 	 	 	 	 	 	 d fd	Zed        Zd Zd Z e	j                         d        Z e	j                         d	d       Z xZS )

Adahessiana  
    Implements the AdaHessian algorithm from "ADAHESSIAN: An Adaptive Second OrderOptimizer for Machine Learning"

    Arguments:
        params (iterable): iterable of parameters to optimize or dicts defining parameter groups
        lr (float, optional): learning rate (default: 0.1)
        betas ((float, float), optional): coefficients used for computing running averages of gradient and the
            squared hessian trace (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve numerical stability (default: 1e-8)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0.0)
        hessian_power (float, optional): exponent of the hessian trace (default: 1.0)
        update_each (int, optional): compute the hessian trace approximation only after *this* number of steps
            (to save time) (default: 1)
        n_samples (int, optional): how many times to sample `z` for the approximation of the hessian trace (default: 1)
    c
                 L   d|k  st        d|       d|k  st        d|       d|d   cxk  rdk  sn t        d|d          d|d   cxk  rdk  sn t        d|d          d|cxk  rdk  sn t        d	|       || _        || _        |	| _        d
| _        t        j                         j                  | j                        | _        t        |||||      }
t        t        | 3  ||
       | j                         D ]  }d|_        d| j                  |   d<    y )N        zInvalid learning rate: zInvalid epsilon value: r         ?z#Invalid beta parameter at index 0:    z#Invalid beta parameter at index 1: zInvalid Hessian power value: i)lrbetasepsweight_decayhessian_powerhessian step)
ValueError	n_samplesupdate_eachavg_conv_kernelseedtorch	Generatormanual_seed	generatordictsuperr   __init__
get_paramshessstate)selfparamsr	   r
   r   r   r   r   r   r   defaultsp	__class__s               Q/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/optim/adahessian.pyr   zAdahessian.__init__   sA    by6rd;<<cz6se<==eAh$$B58*MNNeAh$$B58*MNNm*s*<]OLMM"&. 	*66tyyA%'
 	j$(:" 	.AAF,-DJJqM.)	.    c                      y)NT r   s    r#   is_second_orderzAdahessian.is_second_orderF   s    r$   c                 (    d | j                   D        S )zH
        Gets all parameters in all param_groups with gradients
        c              3   N   K   | ]  }|d    D ]  }|j                   s|   yw)r   N)requires_grad).0groupr!   s      r#   	<genexpr>z(Adahessian.get_params.<locals>.<genexpr>O   s&     ]e%/]QQ__]]s   %	%)param_groupsr'   s    r#   r   zAdahessian.get_paramsJ   s    
 ^t00]]r$   c                     | j                         D ]Z  }t        |j                  t              r| j                  |   d   | j
                  z  dk(  sA|j                  j                          \ y)z;
        Zeros out the accumulated hessian traces.
        r   r   N)r   
isinstancer   floatr   r   zero_)r   r!   s     r#   zero_hessianzAdahessian.zero_hessianQ   sV    
 " 	Aaffe,A~1NQUQaQa1aef1f	r$   c                    g }t        d | j                               D ]O  }| j                  |   d   | j                  z  dk(  r|j	                  |       | j                  |   dxx   dz  cc<   Q t        |      dk(  ry| j                  j                  |d   j                  k7  r@t        j                  |d   j                        j                  | j                        | _        |D cg c]  }|j                   }}t        | j                        D ]  }|D cg c]D  }t        j                  dd|j!                         | j                  |j                        dz  d	z
  F }}t        j"                  j                  |||d
|| j                  dz
  k        }t%        |||      D ]+  \  }}}|xj&                  ||z  | j                  z  z  c_        -  yc c}w c c}w )z}
        Computes the Hutchinson approximation of the hessian trace and accumulates it for each trainable parameter.
        c                     | j                   d uS N)grad)r!   s    r#   <lambda>z(Adahessian.set_hessian.<locals>.<lambda>a   s    !&&"4 r$   r   r   r   N   )r   deviceg       @r   T)grad_outputsonly_inputsretain_graph)filterr   r   r   appendlenr   r;   r   r   r   r   r8   ranger   randintsizeautogradzipr   )	r   r   r!   gradsizsh_zsh_zzs	            r#   set_hessianzAdahessian.set_hessianZ   s    4doo6GH 	/Azz!}^,t/?/??1Da JJqM.)Q.)	/
 v;!>>  F1I$4$44"__VAY-=-=>JJ499UDN!'(A((t~~& 	3Apvwkl%--1affh$..QRQYQYZ]``cffwBw>>&&vBDqSWSaSadeSeOe ' gD r62 3	Q#'DNN223	3 ) xs   )GA	Gc                 v   d}| |       }| j                          | j                          | j                  D ]  }|d   D ]  }|j                  |j                  | j
                  rq|j                         dk(  r^t        j                  |j                        j                  ddgd      j                  |j                        j                         |_        |j                  d|d	   |d
   z  z
         | j                  |   }t        |      dk(  r5d|d<   t        j                  |      |d<   t        j                  |      |d<   |d   |d   }}|d   \  }}	|dxx   dz  cc<   |j                  |      j!                  |j                  d|z
         |j                  |	      j#                  |j                  |j                  d|	z
         d||d   z  z
  }
d|	|d   z  z
  }|d   }||z  j%                  |dz        j!                  |d         }|d	   |
z  }|j'                  |||           |S )z
        Performs a single optimization step.
        Arguments:
            closure (callable, optional) -- a closure that reevaluates the model and returns the loss (default: None)
        Nr      r:      T)dimkeepdimr   r	   r   r   stepexp_avgexp_hessian_diag_sqr
   )alpha)valuer   r   )r4   rM   r/   r8   r   r   rQ   r   absmean	expand_asclonemul_r   rA   
zeros_likeadd_addcmul_pow_addcdiv_)r   closurelossr-   r!   r   rT   rU   beta1beta2bias_correction1bias_correction2kdenom	step_sizes                  r#   rS   zAdahessian.stepv   s@    9D&& %	=E8_ $=66>QVV^''AEEGqL"YYqvv.33A3MWWXYX^X^_eegAF q5;~)>>>?

1 u:?$%E&M','7'7':E)$383C3CA3FE/0/4Y/?G\A],$W~uf" U#((q5y(A#((/88qSXy8Y#$uf'=#= #$uf'=#= /*,/??EEa!eLQQRWX]R^_ "$K*::	

7E)
<I$=%	=N r$   )g?)g?g+?g:0yE>r   r   r   r   Fr7   )__name__
__module____qualname____doc__r   propertyr(   r   r4   r   no_gradrM   rS   __classcell__)r"   s   @r#   r   r   	   s    & !*.X  ^ U]]_3 36 U]]_5 5r$   r   )rn   r   optim	Optimizerr   r&   r$   r#   <module>rt      s'   
 c&& cr$   