
    kh                        d dl Z d dlmZ d dlmZmZmZmZ d dlZd dl	m
c mZ d dlm
Z
mZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZ d
dlmZmZmZ d
dlmZ d
dlmZm Z  g dZ!dej                  dej                  fdZ"ejF                  jI                  d       dej                  dej                  dee%   dej                  fdZ&ejF                  jI                  d        G d de
jN                        Z( G d de
jN                        Z)	 	 	 	 	 	 dSdededed edee%   d!e%d"ee%   d#e*d$e*d%ee   d&ee   d'eej                     d(e+defd)Z,ejF                  jI                  d*        G d+ d,e
jN                        Z- G d- d.e-      Z. G d/ d0e
jN                        Z/ G d1 d2e/      Z0 G d3 d4e
jN                        Z1d5ee%   d6e%d7ee%   d!ee%   dee%   d8e*d9ee   d:e+d;ede1fd<Z2d=eiZ3 G d> d?e      Z4 G d@ dAe      Z5 G dB dCe      Z6 G dD dEe      Z7 G dF dGe      Z8 G dH dIe      Z9 e        e dJe4jt                  fK      dddLd9ee4   d:e+d;ede1fdM              Z; e        e dJe5jt                  fK      dddLd9ee5   d:e+d;ede1fdN              Z< e        e dJe6jt                  fK      dddLd9ee6   d:e+d;ede1fdO              Z= e        e dJe7jt                  fK      dddLd9ee7   d:e+d;ede1fdP              Z> e        e dJe8jt                  fK      dddLd9ee8   d:e+d;ede1fdQ              Z? e        e dJe9jt                  fK      dddLd9ee9   d:e+d;ede1fdR              Z@y)T    N)partial)AnyCallableListOptional)nnTensor   )MLPPermute)StochasticDepth)ImageClassificationInterpolationMode)_log_api_usage_once   )register_modelWeightsWeightsEnum)_IMAGENET_CATEGORIES)_ovewrite_named_paramhandle_legacy_interface)SwinTransformerSwin_T_WeightsSwin_S_WeightsSwin_B_WeightsSwin_V2_T_WeightsSwin_V2_S_WeightsSwin_V2_B_Weightsswin_tswin_sswin_b	swin_v2_t	swin_v2_s	swin_v2_bxreturnc           
      ,   | j                   dd  \  }}}t        j                  | ddd|dz  d|dz  f      } | ddd ddd dd d f   }| ddd ddd dd d f   }| ddd ddd dd d f   }| ddd ddd dd d f   }t        j                  ||||gd      } | S )Nr   r
   .r   )shapeFpadtorchcat)r%   HW_x0x1x2x3s           _/var/www/teggl/fontify/venv/lib/python3.12/site-packages/torchvision/models/swin_transformer.py_patch_merging_padr7   #   s    ggbclGAq!	a!Q1q5!QU+,A	
31addA	B	
31addA	B	
31addA	B	
31addA	B		2r2r"B'AH    r7   relative_position_bias_tablerelative_position_indexwindow_sizec                     |d   |d   z  }| |   }|j                  ||d      }|j                  ddd      j                         j                  d      }|S )Nr   r   r)   r
   )viewpermute
contiguous	unsqueeze)r9   r:   r;   Nrelative_position_biass        r6   _get_relative_position_biasrC   1   sg     	AQ'A9:QR388ArB3;;Aq!DOOQ[[\]^!!r8   rC   c                   n     e Zd ZdZej
                  fdededej                  f   f fdZ	de
fdZ xZS )PatchMergingzPatch Merging Layer.
    Args:
        dim (int): Number of input channels.
        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
    dim
norm_layer.c                     t         |           t        |        || _        t	        j
                  d|z  d|z  d      | _         |d|z        | _        y N   r
   Fbiassuper__init__r   rF   r   Linear	reductionnormselfrF   rG   	__class__s      r6   rO   zPatchMerging.__init__E   I    D!1s7AG%@q3w'	r8   r%   c                 `    t        |      }| j                  |      }| j                  |      }|S z
        Args:
            x (Tensor): input tensor with expected layout of [..., H, W, C]
        Returns:
            Tensor with layout of [..., H/2, W/2, 2*C]
        )r7   rR   rQ   rT   r%   s     r6   forwardzPatchMerging.forwardL   s.     q!IIaLNN1r8   __name__
__module____qualname____doc__r   	LayerNormintr   ModulerO   r	   rZ   __classcell__rU   s   @r6   rE   rE   >   ?     IK (C (Xc299n-E (
 
r8   rE   c                   n     e Zd ZdZej
                  fdededej                  f   f fdZ	de
fdZ xZS )PatchMergingV2zPatch Merging Layer for Swin Transformer V2.
    Args:
        dim (int): Number of input channels.
        norm_layer (nn.Module): Normalization layer. Default: nn.LayerNorm.
    rF   rG   .c                     t         |           t        |        || _        t	        j
                  d|z  d|z  d      | _         |d|z        | _        y rI   rM   rS   s      r6   rO   zPatchMergingV2.__init__`   rV   r8   r%   c                 `    t        |      }| j                  |      }| j                  |      }|S rX   )r7   rQ   rR   rY   s     r6   rZ   zPatchMergingV2.forwardg   s.     q!NN1IIaLr8   r[   rd   s   @r6   rg   rg   Y   re   r8   rg   Tinput
qkv_weightproj_weightrB   	num_heads
shift_sizeattention_dropoutdropoutqkv_bias	proj_biaslogit_scaletrainingc           	      H   | j                   \  }}}}|d   ||d   z  z
  |d   z  }|d   ||d   z  z
  |d   z  }t        j                  | ddd|d|f      }|j                   \  }}}}|j                         }|d   |k\  rd|d<   |d   |k\  rd|d<   t	        |      dkD  r"t        j                  ||d    |d    fd      }||d   z  ||d   z  z  }|j                  |||d   z  |d   ||d   z  |d   |      }|j                  dddddd      j                  ||z  |d   |d   z  |      }|;|	9|	j                         }	|	j                         dz  }|	|d|z   j                          t        j                  |||	      }|j                  |j                  d      |j                  d      d|||z        j                  ddddd      }|d   |d   |d   }}}|}t        j                  |d
      t        j                  |d
      j!                  dd
      z  }t        j"                  |t%        j&                  d            j)                         }||z  }n,|||z  dz  z  }|j+                  |j!                  dd
            }||z   }t	        |      dkD  r|j-                  ||f      }d|d    f|d    |d    f|d    d	ff}d|d    f|d    |d    f|d    d	ff} d}!|D ]%  }"| D ]  }#|!||"d   |"d   |#d   |#d   f<   |!dz  }!  ' |j                  ||d   z  |d   ||d   z  |d         }|j                  dddd      j                  ||d   |d   z        }|j/                  d      |j/                  d      z
  }|j1                  |dk7  t3        d            j1                  |dk(  t3        d            }|j                  |j                  d      |z  |||j                  d      |j                  d            }||j/                  d      j/                  d      z   }|j                  d
||j                  d      |j                  d            }t        j4                  |d
      }t        j6                  |||      }|j+                  |      j!                  dd      j                  |j                  d      |j                  d      |      }t        j                  |||
      }t        j6                  |||      }|j                  |||d   z  ||d   z  |d   |d   |      }|j                  dddddd      j                  ||||      }t	        |      dkD  r t        j                  ||d   |d   fd      }|d	d	d	|d	|d	d	f   j9                         }|S )a  
    Window based multi-head self attention (W-MSA) module with relative position bias.
    It supports both of shifted and non-shifted window.
    Args:
        input (Tensor[N, H, W, C]): The input tensor or 4-dimensions.
        qkv_weight (Tensor[in_dim, out_dim]): The weight tensor of query, key, value.
        proj_weight (Tensor[out_dim, out_dim]): The weight tensor of projection.
        relative_position_bias (Tensor): The learned relative position bias added to attention.
        window_size (List[int]): Window size.
        num_heads (int): Number of attention heads.
        shift_size (List[int]): Shift size for shifted window attention.
        attention_dropout (float): Dropout ratio of attention weight. Default: 0.0.
        dropout (float): Dropout ratio of output. Default: 0.0.
        qkv_bias (Tensor[out_dim], optional): The bias tensor of query, key, value. Default: None.
        proj_bias (Tensor[out_dim], optional): The bias tensor of projection. Default: None.
        logit_scale (Tensor[out_dim], optional): Logit scale of cosine attention for Swin Transformer V2. Default: None.
        training (bool, optional): Training flag used by the dropout parameters. Default: True.
    Returns:
        Tensor[N, H, W, C]: The output tensor after shifted window attention.
    r   r   )r   r
   )shiftsdims   r
   rJ      Nr)   )rF   g      Y@)maxg      g      Y        )prt   )r*   r+   r,   copysumr-   rollr=   r>   reshapeclonenumelzero_linearsize	normalize	transposeclampmathlogexpmatmul	new_zerosr@   masked_fillfloatsoftmaxrp   r?   )$rj   rk   rl   rB   r;   rm   rn   ro   rp   rq   rr   rs   rt   Br/   r0   Cpad_rpad_br%   r1   pad_Hpad_Wnum_windowslengthqkvqkvattn	attn_maskh_slicesw_slicescounthws$                                       r6   shifted_window_attentionr   t   s   F JAq!Q^a+a.00KNBE^a+a.00KNBE	eaAua/0AAueQ"J1~
11~
1 :JJq:a=.:a=.!AO KN*uA/FGK	q%;q>);q>5KPQN;RT_`aTbdefA			!Q1a#++AO[^kZ[n=\^_`A 8#7>>#!Q&!f*%++-
((1j(
+C
++affQiAy!y.
I
Q
QRSUVXY[\^_
`C!fc!fc!f!qA{{1"%A2(>(H(HR(PPkk+488E?CGGIk!iD((xxB+,((D
:KK/	Q(KN?Z]N*KzZ[}n^bMcdQ(KN?Z]N*KzZ[}n^bMcd 	A 6;	!A$1+qtad{23
	 NN5KN#:KNEU`abUcLcepqrest	%%aAq199+{ST~XcdeXfGfg	''*Y-@-@-CC	)))q.%-HTTU^bcUcejkneop	yyk1;	166RS9VWV\V\]^V_`i))!,66q99yyYq	166!9=99Tr"D99T.BDA  A&..qvvay!&&)QGA	K+A			!w2A 	
q%;q>)5KN+BKPQNT_`aTbdefA			!Q1a#++AueQ?A :JJq*Q-A!?fM 	
!RaR!Q,""$AHr8   r   c                        e Zd ZdZ	 	 	 	 ddedee   dee   dedededed	ef fd
Zd Z	d Z
dej                  fdZdedefdZ xZS )ShiftedWindowAttentionz/
    See :func:`shifted_window_attention`.
    rF   r;   rn   rm   rq   rr   ro   rp   c	                 p   t         	|           t        |      dk7  st        |      dk7  rt        d      || _        || _        || _        || _        || _        t        j                  ||dz  |      | _        t        j                  |||      | _        | j                          | j                          y )Nr
   z.window_size and shift_size must be of length 2rx   rK   )rN   rO   len
ValueErrorr;   rn   rm   ro   rp   r   rP   r   proj#define_relative_position_bias_tabledefine_relative_position_index)
rT   rF   r;   rn   rm   rq   rr   ro   rp   rU   s
            r6   rO   zShiftedWindowAttention.__init__   s     	{q C
Oq$8MNN&$"!299S#'9IIc3Y7	002++-r8   c                    t        j                  t        j                  d| j                  d   z  dz
  d| j                  d   z  dz
  z  | j
                              | _        t         j                  j                  | j                  d       y )Nr
   r   r   {Gz?std)	r   	Parameterr-   zerosr;   rm   r9   inittrunc_normal_rT   s    r6   r   z:ShiftedWindowAttention.define_relative_position_bias_table	  sw    ,.LLKKT--a0014T=M=Ma=P9PST9TUW[WeWef-
) 	d??TJr8   c                    t        j                  | j                  d         }t        j                  | j                  d         }t        j                  t        j                  ||d            }t        j
                  |d      }|d d d d d f   |d d d d d f   z
  }|j                  ddd      j                         }|d d d d dfxx   | j                  d   dz
  z  cc<   |d d d d dfxx   | j                  d   dz
  z  cc<   |d d d d dfxx   d| j                  d   z  dz
  z  cc<   |j                  d      j                         }| j                  d|       y )Nr   r   ijindexingr
   r)   r:   )
r-   aranger;   stackmeshgridflattenr>   r?   r   register_buffer)rT   coords_hcoords_wcoordscoords_flattenrelative_coordsr:   s          r6   r   z5ShiftedWindowAttention.define_relative_position_index  s?   << 0 0 34<< 0 0 34U^^HhNOvq1(At4~aqj7QQ)11!Q:EEG1a D$4$4Q$7!$;; 1a D$4$4Q$7!$;; 1a A(8(8(;$;a$?? "1"5"5b"9"A"A"C68OPr8   r&   c                 X    t        | j                  | j                  | j                        S N)rC   r9   r:   r;   r   s    r6   get_relative_position_biasz1ShiftedWindowAttention.get_relative_position_bias  s(    *--t/K/KTM]M]
 	
r8   r%   c                 h   | j                         }t        || j                  j                  | j                  j                  || j
                  | j                  | j                  | j                  | j                  | j                  j                  | j                  j                  | j                        S )
        Args:
            x (Tensor): Tensor with layout of [B, H, W, C]
        Returns:
            Tensor with same layout as input, i.e. [B, H, W, C]
        )rn   ro   rp   rq   rr   rt   )r   r   r   weightr   r;   rm   rn   ro   rp   rL   rt   rT   r%   rB   s      r6   rZ   zShiftedWindowAttention.forward#  s     "&!@!@!B'HHOOII"NN"44LLXX]]iinn]]
 	
r8   TTr|   r|   )r\   r]   r^   r_   ra   r   boolr   rO   r   r   r-   r	   r   rZ   rc   rd   s   @r6   r   r      s     #&.. #Y. I	.
 . . . !. .4KQ
ELL 


 
F 
r8   r   c                        e Zd ZdZ	 	 	 	 ddedee   dee   dedededed	ef fd
Zd Z	de
j                  fdZdefdZ xZS )ShiftedWindowAttentionV2z2
    See :func:`shifted_window_attention_v2`.
    rF   r;   rn   rm   rq   rr   ro   rp   c	           
      4   t         
|   ||||||||       t        j                  t	        j
                  dt	        j                  |ddf      z              | _        t        j                  t        j                  ddd      t        j                  d      t        j                  d|d	            | _        |r\| j                  j                  j                         d
z  }	| j                  j                  |	d|	z   j                  j!                          y y )N)rq   rr   ro   rp   
   r   r
   i   TrK   )inplaceFrx   )rN   rO   r   r   r-   r   onesrs   
SequentialrP   ReLUcpb_mlpr   rL   r   datar   )rT   rF   r;   rn   rm   rq   rr   ro   rp   r   rU   s             r6   rO   z!ShiftedWindowAttentionV2.__init__@  s     	/ 	 		
 <<		"uzz9aQRBS7T2T(UV}}IIa4("''$*?3PY`eAf
 XX]]((*a/FHHMM&1v:.3399; r8   c                    t        j                  | j                  d   dz
   | j                  d   t         j                        }t        j                  | j                  d   dz
   | j                  d   t         j                        }t        j                  t        j
                  ||gd            }|j                  ddd      j                         j                  d      }|d d d d d d dfxx   | j                  d   dz
  z  cc<   |d d d d d d dfxx   | j                  d   dz
  z  cc<   |dz  }t        j                  |      t        j                  t        j                  |      dz         z  d	z  }| j                  d
|       y )Nr   r   )dtyper   r   r
      g      ?g      @relative_coords_table)r-   r   r;   float32r   r   r>   r?   r@   signlog2absr   )rT   relative_coords_hrelative_coords_wr   s       r6   r   z<ShiftedWindowAttentionV2.define_relative_position_bias_table_  se   !LL4+;+;A+>+B)CTEUEUVWEX`e`m`mn!LL4+;+;A+>+B)CTEUEUVWEX`e`m`mn %ENN<MO`;alp,q r 5 = =aA F Q Q S ] ]^_ `aAqj)T-=-=a-@1-DD)aAqj)T-=-=a-@1-DD)"JJ,-

599EZ;[^a;a0bbehh 	 	46KLr8   r&   c                     t        | j                  | j                        j                  d| j                        | j
                  | j                        }dt        j                  |      z  }|S )Nr)      )	rC   r   r   r=   rm   r:   r;   r-   sigmoid)rT   rB   s     r6   r   z3ShiftedWindowAttentionV2.get_relative_position_biaso  s_    !<LL33499"dnnM(("

 "$emm4J&K!K%%r8   r%   c                 ~   | j                         }t        || j                  j                  | j                  j                  || j
                  | j                  | j                  | j                  | j                  | j                  j                  | j                  j                  | j                  | j                        S )r   )rn   ro   rp   rq   rr   rs   rt   )r   r   r   r   r   r;   rm   rn   ro   rp   rL   rs   rt   r   s      r6   rZ   z ShiftedWindowAttentionV2.forwardx  s     "&!@!@!B'HHOOII"NN"44LLXX]]iinn((]]
 	
r8   r   )r\   r]   r^   r_   ra   r   r   r   rO   r   r-   r	   r   rZ   rc   rd   s   @r6   r   r   ;  s     #&<< #Y< I	<
 < < < !< <>M &ELL &
 
r8   r   c                        e Zd ZdZddddej
                  efdededee   dee   de	d	e	d
e	de	de
dej                  f   de
dej                  f   f fdZdefdZ xZS )SwinTransformerBlocka  
    Swin Transformer Block.
    Args:
        dim (int): Number of input channels.
        num_heads (int): Number of attention heads.
        window_size (List[int]): Window size.
        shift_size (List[int]): Shift size for shifted window attention.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.0.
        dropout (float): Dropout rate. Default: 0.0.
        attention_dropout (float): Attention dropout rate. Default: 0.0.
        stochastic_depth_prob: (float): Stochastic depth rate. Default: 0.0.
        norm_layer (nn.Module): Normalization layer.  Default: nn.LayerNorm.
        attn_layer (nn.Module): Attention layer. Default: ShiftedWindowAttention
          @r|   rF   rm   r;   rn   	mlp_ratiorp   ro   stochastic_depth_probrG   .
attn_layerc                 L   t         |           t        |         |	|      | _         |
||||||      | _        t        |d      | _         |	|      | _        t        |t        ||z        |gt        j                  d |      | _        | j                  j                         D ]~  }t        |t        j                        st        j                   j#                  |j$                         |j&                  Tt        j                   j)                  |j&                  d        y )N)ro   rp   row)activation_layerr   rp   gư>r   )rN   rO   r   norm1r   r   stochastic_depthnorm2r   ra   r   GELUmlpmodules
isinstancerP   r   xavier_uniform_r   rL   normal_)rT   rF   rm   r;   rn   r   rp   ro   r   rG   r   mrU   s               r6   rO   zSwinTransformerBlock.__init__  s     	D!_
/
	 !00Eu M_
sSy137"''[_ipq!!# 	6A!RYY'''166%GGOOAFFO5		6r8   r%   c                     || j                  | j                  | j                  |                  z   }|| j                  | j                  | j	                  |                  z   }|S r   )r   r   r   r   r   rY   s     r6   rZ   zSwinTransformerBlock.forward  sS    %%dii

1&>??%%dhhtzz!}&=>>r8   )r\   r]   r^   r_   r   r`   r   ra   r   r   r   rb   rO   r	   rZ   rc   rd   s   @r6   r   r     s    * #&'*/1||/E!6!6 !6 #Y	!6
 I!6 !6 !6 !!6  %!6 S"))^,!6 S"))^,!6F r8   r   c                        e Zd ZdZddddej
                  efdededee   dee   de	d	e	d
e	de	de
dej                  f   de
dej                  f   f fdZdefdZ xZS )SwinTransformerBlockV2a  
    Swin Transformer V2 Block.
    Args:
        dim (int): Number of input channels.
        num_heads (int): Number of attention heads.
        window_size (List[int]): Window size.
        shift_size (List[int]): Shift size for shifted window attention.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.0.
        dropout (float): Dropout rate. Default: 0.0.
        attention_dropout (float): Attention dropout rate. Default: 0.0.
        stochastic_depth_prob: (float): Stochastic depth rate. Default: 0.0.
        norm_layer (nn.Module): Normalization layer.  Default: nn.LayerNorm.
        attn_layer (nn.Module): Attention layer. Default: ShiftedWindowAttentionV2.
    r   r|   rF   rm   r;   rn   r   rp   ro   r   rG   .r   c                 8    t         |   |||||||||	|

       y )N)r   rp   ro   r   rG   r   )rN   rO   )rT   rF   rm   r;   rn   r   rp   ro   r   rG   r   rU   s              r6   rO   zSwinTransformerBlockV2.__init__  s5     	/"7!! 	 	
r8   r%   c                     || j                  | j                  | j                  |                  z   }|| j                  | j                  | j	                  |                  z   }|S r   )r   r   r   r   r   rY   s     r6   rZ   zSwinTransformerBlockV2.forward  sU     %%djj1&>??%%djj!&=>>r8   )r\   r]   r^   r_   r   r`   r   ra   r   r   r   rb   rO   r	   rZ   rc   rd   s   @r6   r   r     s    * #&'*/1||/G

 
 #Y	

 I
 
 
 !
  %
 S"))^,
 S"))^,
4 r8   r   c                        e Zd ZdZdddddddefdee   ded	ee   d
ee   dee   dedededededee	de
j                  f      dee	de
j                  f      de	de
j                  f   f fdZd Z xZS )r   a;  
    Implements Swin Transformer from the `"Swin Transformer: Hierarchical Vision Transformer using
    Shifted Windows" <https://arxiv.org/abs/2103.14030>`_ paper.
    Args:
        patch_size (List[int]): Patch size.
        embed_dim (int): Patch embedding dimension.
        depths (List(int)): Depth of each Swin Transformer layer.
        num_heads (List(int)): Number of attention heads in different layers.
        window_size (List[int]): Window size.
        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim. Default: 4.0.
        dropout (float): Dropout rate. Default: 0.0.
        attention_dropout (float): Attention dropout rate. Default: 0.0.
        stochastic_depth_prob (float): Stochastic depth rate. Default: 0.1.
        num_classes (int): Number of classes for classification head. Default: 1000.
        block (nn.Module, optional): SwinTransformer Block. Default: None.
        norm_layer (nn.Module, optional): Normalization layer. Default: None.
        downsample_layer (nn.Module): Downsample layer (patch merging). Default: PatchMerging.
    r   r|   g?i  N
patch_size	embed_dimdepthsrm   r;   r   rp   ro   r   num_classesrG   .blockdownsample_layerc                 x   t         |           t        |        |
| _        |t        }|t        t        j                  d      }g }|j                  t        j                  t        j                  d||d   |d   f|d   |d   f      t        g d       ||                   t        |      }d}t        t        |            D ]  }g }|d|z  z  }t        ||         D ]Y  }|	t        |      z  |dz
  z  }|j                   ||||   ||D cg c]  }|dz  dk(  rdn|dz   c}|||||		             |dz  }[ |j                  t        j                  |        |t        |      dz
  k  s|j                   |||              t        j                  | | _        |dt        |      dz
  z  z  } ||      | _        t        g d
      | _        t        j&                  d      | _        t        j*                  d      | _        t        j.                  ||
      | _        | j3                         D ]~  }t5        |t        j.                        st        j6                  j9                  |j:                  d       |j<                  Vt        j6                  j?                  |j<                          y c c}w )Ngh㈵>)epsrx   r   r   )kernel_sizestride)r   r
   rx   r   r
   )r;   rn   r   rp   ro   r   rG   )r   rx   r   r
   r   r   ) rN   rO   r   r  r   r   r   r`   appendr   Conv2dr   r   ranger   r   featuresrR   r>   AdaptiveAvgPool2davgpoolFlattenr   rP   headr   r   r   r   r   rL   zeros_)rT   r   r   r  rm   r;   r   rp   ro   r   r  rG   r  r  layerstotal_stage_blocksstage_block_idi_stagestagerF   i_layersd_probr   num_featuresr   rU   s                            r6   rO   zSwinTransformer.__init__  s     	D!&=(E 48J"$MM		yz!}jm.LV`abVceopqerUs %9%	
 ![S[) 	AG%'Eaj(C 1 $/%2GGK]`aKab!'*$/OZ#[!1)9AqAv$E#["+ '*;.5#-
 !#!$" MM"--/0#f+/*.sJ?@/	A0 v. 1Vq#99|,	|,++A.zz!}IIlK8	 	+A!RYY'%%ahhD%966%GGNN166*		+- $\s   J7c                     | j                  |      }| j                  |      }| j                  |      }| j                  |      }| j	                  |      }| j                  |      }|S r   )r  rR   r>   r  r   r  rY   s     r6   rZ   zSwinTransformer.forward_  sV    MM!IIaLLLOLLOLLOIIaLr8   )r\   r]   r^   r_   rE   r   ra   r   r   r   r   rb   rO   rZ   rc   rd   s   @r6   r   r     s    4 #&'*9=485AM+IM+ M+ S		M+
 9M+ #YM+ M+ M+ !M+  %M+ M+ Xc299n56M+ bii01M+ #3		>2M+^r8   r   r   r   r  r   weightsprogresskwargsc           
          |#t        |dt        |j                  d                t        d| |||||d|}	|"|	j	                  |j                  |d             |	S )Nr  
categories)r   r   r  rm   r;   r   T)r  
check_hash )r   r   metar   load_state_dictget_state_dict)
r   r   r  rm   r;   r   r  r  r  models
             r6   _swin_transformerr&  i  s{     fmSl9S5TU 3 E g44hSW4XYLr8   r  c                   p    e Zd Z ed eeddej                        i eddddd	d
didddd      Z	e	Z
y)r   z7https://download.pytorch.org/models/swin_t-704ceda3.pth      	crop_sizeresize_sizeinterpolationibr(  r(  Uhttps://github.com/pytorch/vision/tree/main/references/classification#swintransformerImageNet-1KguV^T@glW@zacc@1zacc@5gX9@g\([@YThese weights reproduce closely the results of the paper using a similar training recipe.
num_paramsmin_sizerecipe_metrics_ops
_file_size_docsurl
transformsr"  Nr\   r]   r^   r   r   r   r   BICUBIC_COMMON_METAIMAGENET1K_V1DEFAULTr!  r8   r6   r   r     sg    E3CO`OhOh


""m##   t
M* Gr8   r   c                   p    e Zd Z ed eeddej                        i eddddd	d
didddd      Z	e	Z
y)r   z7https://download.pytorch.org/models/swin_s-5e29d889.pthr(     r*  irr.  r/  r0  gCT@gףp=
X@r1  gZd{!@gx&g@r2  r3  r;  Nr>  r!  r8   r6   r   r     sg    E3CO`OhOh


""m##  !t
M* Gr8   r   c                   p    e Zd Z ed eeddej                        i eddddd	d
didddd      Z	e	Z
y)r   z7https://download.pytorch.org/models/swin_b-68c6b09e.pthr(     r*  i<;r.  r/  r0  gh|?T@g)\(X@r1  g&1.@gt@r2  r3  r;  Nr>  r!  r8   r6   r   r     sg    E3CO`OhOh


""m##  !t
M* Gr8   r   c                   p    e Zd Z ed eeddej                        i eddddd	d
didddd      Z	e	Z
y)r   z:https://download.pytorch.org/models/swin_v2_t-b137f0e2.pth     r*  iRrH  rH  Xhttps://github.com/pytorch/vision/tree/main/references/classification#swintransformer-v2r0  gS㥛T@g rX@r1  g(\@gMb([@r2  r3  r;  Nr>  r!  r8   r6   r   r     sg    H3CO`OhOh


""p##  !t
M* Gr8   r   c                   p    e Zd Z ed eeddej                        i eddddd	d
didddd      Z	e	Z
y)r   z:https://download.pytorch.org/models/swin_v2_s-637d8ceb.pthrH  rI  r*  irJ  rK  r0  g!rhT@gNbX94X@r1  gd;O'@gg@r2  r3  r;  Nr>  r!  r8   r6   r   r     g    H3CO`OhOh


""p##  !t
M* Gr8   r   c                   p    e Zd Z ed eeddej                        i eddddd	d
didddd      Z	e	Z
y)r   z:https://download.pytorch.org/models/swin_v2_b-781e5279.pthrH  i  r*  i=rJ  rK  r0  gI+U@gK7X@r1  g33333S4@gˡEu@r2  r3  r;  Nr>  r!  r8   r6   r   r   	  rM  r8   r   
pretrained)r  )r  r  c                 d    t         j                  |       } t        dddgdg dg dddgd| |d|S )	a  
    Constructs a swin_tiny architecture from
    `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows <https://arxiv.org/abs/2103.14030>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_T_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_T_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_T_Weights
        :members:
    rJ   `   r
   r
      r
   rx   rS           皙?r   r   r  rm   r;   r   r  r  r!  )r   verifyr&  r  r  r  s      r6   r   r   "  sP    . ##G,G 
q6 F!
 
 
r8   c                 d    t         j                  |       } t        dddgdg dg dddgd| |d|S )	a  
    Constructs a swin_small architecture from
    `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows <https://arxiv.org/abs/2103.14030>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_S_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_S_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_S_Weights
        :members:
    rJ   rQ  r
   r
      r
   rT  rW  333333?rY  r!  )r   rZ  r&  r[  s      r6   r    r    H  sP    . ##G,G 
q6 F!
 
 
r8   c                 d    t         j                  |       } t        dddgdg dg dddgd| |d|S )	a  
    Constructs a swin_base architecture from
    `Swin Transformer: Hierarchical Vision Transformer using Shifted Windows <https://arxiv.org/abs/2103.14030>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_B_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_B_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_B_Weights
        :members:
    rJ      r]  rJ   r   r       rW        ?rY  r!  )r   rZ  r&  r[  s      r6   r!   r!   n  sP    . ##G,G 
q6 F!
 
 
r8   c                 x    t         j                  |       } t        dddgdg dg dddgd| |t        t        d
|S )	a  
    Constructs a swin_v2_tiny architecture from
    `Swin Transformer V2: Scaling Up Capacity and Resolution <https://arxiv.org/abs/2111.09883>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_V2_T_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_V2_T_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_V2_T_Weights
        :members:
    rJ   rQ  rR  rT  r   rX  
r   r   r  rm   r;   r   r  r  r  r  r!  )r   rZ  r&  r   rg   r[  s      r6   r"   r"     sV    .  &&w/G q6 F!$'  r8   c                 x    t         j                  |       } t        dddgdg dg dddgd| |t        t        d
|S )	a  
    Constructs a swin_v2_small architecture from
    `Swin Transformer V2: Scaling Up Capacity and Resolution <https://arxiv.org/abs/2111.09883>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_V2_S_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_V2_S_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_V2_S_Weights
        :members:
    rJ   rQ  r]  rT  r   r_  rf  r!  )r   rZ  r&  r   rg   r[  s      r6   r#   r#     sV    .  &&w/G q6 F!$'  r8   c                 x    t         j                  |       } t        dddgdg dg dddgd| |t        t        d
|S )	a  
    Constructs a swin_v2_base architecture from
    `Swin Transformer V2: Scaling Up Capacity and Resolution <https://arxiv.org/abs/2111.09883>`_.

    Args:
        weights (:class:`~torchvision.models.Swin_V2_B_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.Swin_V2_B_Weights` below for
            more details, and possible values. By default, no pre-trained
            weights are used.
        progress (bool, optional): If True, displays a progress bar of the
            download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.swin_transformer.SwinTransformer``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/swin_transformer.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.Swin_V2_B_Weights
        :members:
    rJ   ra  r]  rb  r   rd  rf  r!  )r   rZ  r&  r   rg   r[  s      r6   r$   r$     sV    .  &&w/G q6 F!$'  r8   )r|   r|   NNNT)Ar   	functoolsr   typingr   r   r   r   r-   torch.nn.functionalr   
functionalr+   r	   ops.miscr   r   ops.stochastic_depthr   transforms._presetsr   r   utilsr   _apir   r   r   _metar   _utilsr   r   __all__r7   fxwrapra   rC   rb   rE   rg   r   r   r   r   r   r   r   r   r&  r@  r   r   r   r   r   r   rA  r   r    r!   r"   r#   r$   r!  r8   r6   <module>rw     s     0 0     # 2 H ' 6 6 ' B"%,, 5<<  " #""',,"IN"dhildm"
\\" + ,299 6RYY F  #!%"&*.ppp p #	p
 cp p S	p p p vp p %,,'p p pf ( )N
RYY N
bS
5 S
l6299 6r/1 /djbii jZS	 I Cy	
 c ! k"   > &
[ 2[ 2[ 2 2 2 2 ,0L0L!MN26 !x/ !$ !Y\ !ap ! O !H ,0L0L!MN26 !x/ !$ !Y\ !ap ! O !H ,0L0L!MN26 !x/ !$ !Y\ !ap ! O !H ,0A0O0O!PQ8<t #(#45 # #_b #gv # R #L ,0A0O0O!PQ8<t #(#45 # #_b #gv # R #L ,0A0O0O!PQ8<t #(#45 # #_b #gv # R #r8   