
    kh                         d Z ddlZddlmZ ddlmc mZ ddlmZm	Z	m
Z
  G d dej                        Z G d dej                        Z G d	 d
ej                        Z G d dej                        Zy)a   Convolution with Weight Standardization (StdConv and ScaledStdConv)

StdConv:
@article{weightstandardization,
  author    = {Siyuan Qiao and Huiyu Wang and Chenxi Liu and Wei Shen and Alan Yuille},
  title     = {Weight Standardization},
  journal   = {arXiv preprint arXiv:1903.10520},
  year      = {2019},
}
Code: https://github.com/joe-siyuan-qiao/WeightStandardization

ScaledStdConv:
Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets`
    - https://arxiv.org/abs/2101.08692
Official Deepmind JAX code: https://github.com/deepmind/deepmind-research/tree/master/nfnets

Hacked together by / copyright Ross Wightman, 2021.
    N   )get_paddingget_padding_valuepad_samec                   .     e Zd ZdZ	 	 d fd	Zd Z xZS )	StdConv2dzConv2d with Weight Standardization. Used for BiT ResNet-V2 models.

    Paper: `Micro-Batch Training with Batch-Channel Normalization and Weight Standardization` -
        https://arxiv.org/abs/1903.10520v2
    c
           
      `    |t        |||      }t        
| 	  ||||||||       |	| _        y )Nstridepaddingdilationgroupsbias)r   super__init__eps)self
in_channelout_channelskernel_sizer   r   r   r   r   r   	__class__s             P/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/layers/std_conv.pyr   zStdConv2d.__init__    sF     ?!+vx@Gk&hvD 	 	J     c           	      f   t        j                  | j                  j                  d| j                  d      d d dd| j
                        j                  | j                        }t        j                  ||| j                  | j                  | j                  | j                  | j                        }|S Nr   T        )trainingmomentumr   )F
batch_normweightreshaper   r   
reshape_asconv2dr   r   r   r   r   r   xr"   s      r   forwardzStdConv2d.forward*   s    KK4#4#4b94BDHH66@j6M 	 HHQ		4;;dmmUYU`U`ar   )r   Nr   r   Fư>__name__
__module____qualname____doc__r   r(   __classcell__r   s   @r   r   r      s     LP26r   r   c                   .     e Zd ZdZ	 	 d fd	Zd Z xZS )StdConv2dSamezConv2d with Weight Standardization. TF compatible SAME padding. Used for ViT Hybrid model.

    Paper: `Micro-Batch Training with Batch-Channel Normalization and Weight Standardization` -
        https://arxiv.org/abs/1903.10520v2
    c
           
      t    t        ||||      \  }}
t        | 	  ||||||||       |
| _        |	| _        y )Nr   r   r
   )r   r   r   same_padr   )r   r   r   r   r   r   r   r   r   r   
is_dynamicr   s              r   r   zStdConv2dSame.__init__8   sP     0V^fgk&'\d 	 	& #r   c           	         | j                   r,t        || j                  | j                  | j                        }t        j                  | j                  j                  d| j                  d      d d dd| j                        j                  | j                        }t        j                  ||| j                  | j                  | j                  | j                  | j                        }|S r   )r5   r   r   r   r   r    r!   r"   r#   r   r   r$   r%   r   r   r   r&   s      r   r(   zStdConv2dSame.forwardB   s    ==D,,dkk4==IAKK4#4#4b94BDHH66@j6M 	 HHQ		4;;dmmUYU`U`ar   )r   SAMEr   r   Fr)   r*   r0   s   @r   r2   r2   2   s     LR26r   r2   c                   .     e Zd ZdZ	 	 d fd	Zd Z xZS )ScaledStdConv2da4  Conv2d layer with Scaled Weight Standardization.

    Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets` -
        https://arxiv.org/abs/2101.08692

    NOTE: the operations used in this impl differ slightly from the DeepMind Haiku impl. The impact is minor.
    c           
      (   |t        |||      }t        | 	  ||||||||       t        j                  t        j                  | j                  dddf|            | _        |	| j                  d   j                         dz  z  | _        |
| _        y )Nr
   r   r         )r   r   r   nn	Parametertorchfullr   gainr"   numelscaler   )r   in_channelsr   r   r   r   r   r   r   gammar   	gain_initr   s               r   r   zScaledStdConv2d.__init__U   s     ?!+vx@G{67]e 	 	& LLT->->1a,H)!TU	T[[^113t;;
r   c           	         t        j                  | j                  j                  d| j                  d      d d | j
                  | j                  z  j                  d      dd| j                        j                  | j                        }t        j                  ||| j                  | j                  | j                  | j                  | j                        S Nr   r   Tr   )r"   r   r   r   )r    r!   r"   r#   r   rA   rC   viewr   r$   r%   r   r   r   r   r   r&   s      r   r(   zScaledStdConv2d.forwarda   s    KK4#4#4b94II

*004BDHH6 7Aj6M 	 xx6499dkk4<<X\XcXcddr   )r   Nr   r   T      ?r)   rJ   r*   r0   s   @r   r:   r:   L   s     MQLO
er   r:   c                   .     e Zd ZdZ	 	 d fd	Zd Z xZS )ScaledStdConv2dSamea\  Conv2d layer with Scaled Weight Standardization and Tensorflow-like SAME padding support

    Paper: `Characterizing signal propagation to close the performance gap in unnormalized ResNets` -
        https://arxiv.org/abs/2101.08692

    NOTE: the operations used in this impl differ slightly from the DeepMind Haiku impl. The impact is minor.
    c           
      <   t        ||||      \  }}t        | 	  ||||||||       t        j                  t        j                  | j                  dddf|            | _        |	| j                  d   j                         dz  z  | _        || _        |
| _        y )Nr4   r
   r   r   r<   )r   r   r   r=   r>   r?   r@   r   rA   r"   rB   rC   r5   r   )r   rD   r   r   r   r   r   r   r   rE   r   rF   r6   r   s                r   r   zScaledStdConv2dSame.__init__r   s     0V^fg{67]e 	 	& LLT->->1a,H)!TU	T[[^113t;;
"r   c           	          | j                   r,t        || j                  | j                  | j                        }t        j                  | j                  j                  d| j                  d      d d | j                  | j                  z  j                  d      dd| j                        j                  | j                        }t        j                  ||| j                   | j                  | j"                  | j                  | j$                        S rH   )r5   r   r   r   r   r    r!   r"   r#   r   rA   rC   rI   r   r$   r%   r   r   r   r&   s      r   r(   zScaledStdConv2dSame.forward~   s    ==D,,dkk4==IAKK4#4#4b94II

*004BDHH6 7Aj6M 	 xx6499dkk4<<X\XcXcddr   )r   r8   r   r   TrJ   r)   rJ   r*   r0   s   @r   rL   rL   i   s     MSLO
er   rL   )r.   r?   torch.nnr=   torch.nn.functional
functionalr    r   r   r   r   Conv2dr   r2   r:   rL    r   r   <module>rT      sb   $     = =		 0BII 4ebii e:e")) er   