
    kh                         d Z ddlZddlmc mZ ddlmZ ddlZdej                  fdZ
dej                  fdZdeded	efd
Z G d de      Zy)a3  
AdamP Optimizer Implementation copied from https://github.com/clovaai/AdamP/blob/master/adamp/adamp.py

Paper: `Slowing Down the Weight Norm Increase in Momentum-based Optimizers` - https://arxiv.org/abs/2006.08217
Code: https://github.com/clovaai/AdamP

Copyright (c) 2020-present NAVER Corp.
MIT license
    N)	Optimizerreturnc                 D    | j                  | j                  d      d      S )Nr   )reshapesizexs    L/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/optim/adamp.py_channel_viewr      s    99QVVAY##    c                 &    | j                  dd      S )N   r   )r   r	   s    r   _layer_viewr      s    99Qr   deltawd_ratioepsc                 "   d}ddt        | j                        dz
  z  z   }t        t        fD ]  } ||       }	 ||      }
t	        j
                  |
|	d|      j                         }|j                         |t        j                  |	j                  d            z  k  ss| |	j                  dd      j                  |      j                  |      z  }|| |||z        j                  d      j                  |      z  z  }|}||fc S  ||fS )	N      ?)r   )r   r   )dimr      )pr   )r   )lenshaper   r   Fcosine_similarityabs_maxmathsqrtr   normadd_r   sum)r   gradperturbr   r   r   wdexpand_size	view_func
param_view	grad_view
cosine_simp_ns                r   
projectionr-      s	   	B$#agg,"233K#[1 
	q\
dO	((JA3OTTV
 >>edii
0B&CCCjooqo166s;CCKPPCsYsW}599a9@HHUUUGBB;
 B;r   c                   ^     e Zd Z	 	 	 	 	 	 	 d fd	Z ej
                         dd       Z xZS )AdamPc	           	      R    t        |||||||      }	t        t        |   ||	       y )N)lrbetasr   weight_decayr   r   nesterov)dictsuperr/   __init__)selfparamsr1   r2   r   r3   r   r   r4   defaults	__class__s             r   r7   zAdamP.__init__,   s7     %
 	eT#FH5r   c                 $   d }|$t        j                         5   |       }d d d        | j                  D ]  }|d   D ]  }|j                  |j                  }|d   \  }}|d   }| j                  |   }	t        |	      dk(  r5d|	d<   t        j                  |      |	d<   t        j                  |      |	d<   |	d   |	d   }}
|	dxx   dz  cc<   d||	d   z  z
  }d||	d   z  z
  }|
j                  |      j                  |d|z
  	       |j                  |      j                  ||d|z
  
       |j                         t        j                  |      z  j                  |d         }|d   |z  }|r||
z  d|z
  |z  z   |z  }n|
|z  }d}t        |j                        dkD  rt        ||||d   |d   |d         \  }}|d   dkD  r |j                  d|d   |d   z  |z  z
         |j                  || 	         |S # 1 sw Y   xY w)Nr9   r2   r4   r   stepexp_avg
exp_avg_sqr   )alpha)valuer   r1   r   r   r   r3   )torchenable_gradparam_groupsr$   stater   
zeros_likemul_r"   addcmul_r    r   r   r-   )r8   closurelossgroupr   r$   beta1beta2r4   rE   r>   r?   bias_correction1bias_correction2denom	step_sizer%   r   s                     r   r=   z
AdamP.stepB   sZ   ""$ !y! && -	2E8_ ,266>vv$W~u ,

1 u:?$%E&M','7'7':E)$*/*:*:1*=E,' ',I&6l8Kf"#$uf'=#= #$uf'=#= U#((QY(?&//d!e)/L#*TYY7G-HHNNuUZ|\!$K*::	$w!e)t1CCuLG%oG qww<!#(21dGU7^UZ[eUfhmnsht(u%GX (1,FF2deN.C Ch NNO wyj1Y,2-	2^ e! !s   HH)gMbP?)g?g+?g:0yE>r   皙?rR   F)N)__name__
__module____qualname__r7   rB   no_gradr=   __classcell__)r;   s   @r   r/   r/   +   s;     6, U]]_5 5r   r/   )__doc__rB   torch.nn.functionalnn
functionalr   torch.optim.optimizerr   r   Tensorr   r   floatr-   r/    r   r   <module>r`      sc       + $ $ell   U $MI Mr   