
    kh@                         d dl Z d dlZd dlZd dl mZ d dlmZ d ZddZddZddZ	d Z
	 	 	 dd	ej                  d
edededef
dZ	 	 dd	ej                  d
ededefdZy)    N)nn)_calculate_fan_in_and_fan_outc                    d }||d|z  z
  k  s||d|z  z   kD  rt        j                  dd        |||z
  |z        } |||z
  |z        }| j                  d|z  dz
  d|z  dz
         | j                          | j	                  |t        j                  d      z         | j                  |       | j                  ||       | S )Nc                 d    dt        j                  | t        j                  d      z        z   dz  S )N      ?       @)matherfsqrt)xs    S/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/layers/weight_init.pynorm_cdfz _trunc_normal_.<locals>.norm_cdf   s(    TXXa$))B-/00B66       zjmean is more than 2 std from [a, b] in nn.init.trunc_normal_. The distribution of values may be incorrect.)
stacklevel   r   )minmax)	warningswarnuniform_erfinv_mul_r	   r   add_clamp_)tensormeanstdabr   lus           r   _trunc_normal_r#      s    7 	q1s7{q1s7{ 2 E!"	$ 	!d(c!"A!d(c!"A OOAEAIq1uqy) NN KKdiim#$
KK MMaQMMr   c                 t    t        j                         5  t        | ||||      cddd       S # 1 sw Y   yxY w)a  Fills the input Tensor with values drawn from a truncated
    normal distribution. The values are effectively drawn from the
    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
    with values outside :math:`[a, b]` redrawn until they are within
    the bounds. The method used for generating the random values works
    best when :math:`a \leq \text{mean} \leq b`.

    NOTE: this impl is similar to the PyTorch trunc_normal_, the bounds [a, b] are
    applied while sampling the normal with mean/std applied, therefore a, b args
    should be adjusted to match the range of mean, std args.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        a: the minimum cutoff value
        b: the maximum cutoff value
    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.trunc_normal_(w)
    N)torchno_gradr#   r   r   r   r   r    s        r   trunc_normal_r(   +   s2    . 
 7fdCA67 7 7s   .7c                     t        j                         5  t        | dd||       | j                  |      j	                  |       ddd       | S # 1 sw Y   | S xY w)a  Fills the input Tensor with values drawn from a truncated
    normal distribution. The values are effectively drawn from the
    normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
    with values outside :math:`[a, b]` redrawn until they are within
    the bounds. The method used for generating the random values works
    best when :math:`a \leq \text{mean} \leq b`.

    NOTE: this 'tf' variant behaves closer to Tensorflow / JAX impl where the
    bounds [a, b] are applied when sampling the normal distribution with mean=0, std=1.0
    and the result is subsequently scaled and shifted by the mean and std args.

    Args:
        tensor: an n-dimensional `torch.Tensor`
        mean: the mean of the normal distribution
        std: the standard deviation of the normal distribution
        a: the minimum cutoff value
        b: the maximum cutoff value
    Examples:
        >>> w = torch.empty(3, 5)
        >>> nn.init.trunc_normal_(w)
    r   r   N)r%   r&   r#   r   r   r'   s        r   trunc_normal_tf_r*   F   sR    . 
 $vq#q!,Cd#$ M$ Ms   0AAc                 ,   t        |       \  }}|dk(  r|}n|dk(  r|}n|dk(  r||z   dz  }|z  }|dk(  r$t        | t        j                  |      dz         y |dk(  rCt	        j
                         5  | j                  t        j                  |             d d d        y |d	k(  rIt        j                  d
|z        }t	        j
                         5  | j                  | |       d d d        y t        d|       # 1 sw Y   y xY w# 1 sw Y   y xY w)Nfan_infan_outfan_avgr   truncated_normalg۶%?r   normaluniform   zinvalid distribution )	r   r*   r	   r   r%   r&   normal_r   
ValueError)	r   scalemodedistributionr,   r-   denomvariancebounds	            r   variance_scaling_r<   c   s   3F;OFGx				'!Q&u}H))TYYx%8;M%MN		!]]_ 	4NNtyy2N3	4 	4		"		!h,']]_ 	+OOUFE*	+ 	+ 0?@@	4 	4	+ 	+s   3&C>D
>D
Dc                      t        | dd       y )Nr,   r/   )r7   r8   )r<   )r   s    r   lecun_normal_r>   |   s    f8:LMr   modulename	init_bias	head_biasclassifier_namec                    t        | t        j                  t        j                  t        j                  t        j
                  f      r|j                  |      rTt        j                  j                  | j                         t        j                  j                  | j                  |       y t        j                  j                  | j                  d       t        | t        j                        r8| j                  +t        j                  j                  | j                  |       y y y t        | d      r| j                          y y )N{Gz?r0   init_weights)
isinstancer   LinearConv1dConv2dConv3d
startswithinitzeros_weight	constant_biasr(   hasattrrF   )r?   r@   rA   rB   rC   s        r   init_weight_vitrS      s     &299biiBIIFG???+GGNN6==)GGfkk95GG!!&--T!:&")),1H!!&++y9 2I,		( 
)r   c                 &   t        | t        j                        r|j                  |      rTt        j                  j                  | j                         t        j                  j                  | j                  |       y t        j                  j                  | j                         | j                  Yd|v r+t        j                  j                  | j                  d      n(t        j                  j                  | j                         y y t        | t        j                        rLt        | j                         | j                  *t        j                  j                  | j                         y y t        | d      r| j                          y y )Nmlpgư>r0   rF   )rG   r   rH   rL   rM   rN   rO   rP   rQ   xavier_uniform_r4   rJ   r>   rR   rF   )r?   r@   rB   rC   s       r   init_weight_jaxrW      s     &"))$???+GGNN6==)GGfkk95GG##FMM2{{&:?4-6RWW^^\b\g\gMh '	FBII	&fmm$;;"GGNN6;;' #		( 
)r   )        r   g       r   )r   r,   r1   )rE   rX   head)rX   rY   )r%   r	   r   r   torch.nn.initr   r#   r(   r*   r<   r>   ModulestrfloatrS   rW    r   r   <module>r_      s        7 F76:A2N  %		  	
 , %			  	r   