
    kh]                        d Z ddlZddlZddlmZmZmZmZmZm	Z	 ddl
Z
ddl
mZ ddlmc mZ ddlmZmZ ddlmZ ddlmZ  ej,                  e      Z G d	 d
ej2                        Z G d de      Z	 	 	 d"dee   dededefdZe
j@                  Z!e!fdeeef   deeef   dedede
jD                  de
jF                  de
jH                  fdZ%e!fde
jH                  de
jH                  deeef   de
jF                  de
jF                  de
jH                  fdZ&	 	 	 d"de
jH                  dee   dededef
dZ' G d dej2                        Z( G d  d!ej2                        Z)y)#a?   Image to Patch Embedding using Conv2d

A convolution based approach to patchifying a 2D image w/ embedding projection.

Based on code in:
  * https://github.com/google-research/vision_transformer
  * https://github.com/google-research/big_vision/tree/main/big_vision

Hacked together by / Copyright 2020 Ross Wightman
    N)CallableDictListOptionalTupleUnion)nn   )Formatnchw_to)	to_2tuple)_assertc                   |    e Zd ZU dZeed<   ej                  j                  e	   ed<   	 	 	 	 	 	 	 	 	 	 dde
eeeef   f   dedededee   d	e	dee   d
e	de	de	f fdZde
eeeef   f   fdZ	 	 ddee
eeeef   f      dee
eeeef   f      fdZdde
eeef   ef   fdZdeeef   deeef   fdZd Z xZS )
PatchEmbed! 2D Image to Patch Embedding
    
output_fmtdynamic_img_padimg_size
patch_sizein_chans	embed_dim
norm_layerflattenbiasstrict_img_sizec                    t         |           t        |      | _        | j	                  |      \  | _        | _        | _        |d| _        t        |      | _
        n|| _        t        j                  | _
        |	| _        |
| _        t        j                  |||||      | _        |r ||      | _        y t        j"                         | _        y )NFkernel_sizestrider   )super__init__r   r   _init_img_sizer   	grid_sizenum_patchesr   r   r   NCHWr   r   r	   Conv2dprojIdentitynorm)selfr   r   r   r   r   r   r   r   r   r   	__class__s              S/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/layers/patch_embed.pyr!   zPatchEmbed.__init__    s     	#J/:>:M:Mh:W7t~t'7! DL$Z0DO #DL$kkDO..IIh	zR\cgh	-7Jy)	R[[]	    c                     | j                   sJ |yt        |      }t        t        || j                         D cg c]
  \  }}||z   c}}      }|d   |d   z  }|||fS c c}}w )N)NNNr   r
   )r   r   tuplezip)r*   r   spr#   r$   s         r,   r"   zPatchEmbed._init_img_size>   sn    #X&c(DOO.LMda16MN	lYq\1K// Ns   A"
c           	         d }|t        |      }||| j                  k7  rt        j                         5  t	        j
                  | j                  j                  | j                  j                  ||| j                  j                  d u      }|j                  j                  t        | j                  j                  |d             | j                  j                  /|j                  j                  | j                  j                         || _        d d d        || _        |xs | j                  }|| j                  k7  s|%| j                  |      \  | _        | _        | _        y y # 1 sw Y   WxY w)Nr   T)verbose)r   r   torchno_gradr	   r&   r'   in_channelsout_channelsr   weightcopy_resample_patch_embedr   r"   r#   r$   )r*   r   r   new_patch_sizenew_projs        r,   set_input_sizezPatchEmbed.set_input_sizeG   s+   
 !&z2N%.DOO*K %99II))II** .)t3 %%&:499;K;K^ei&jk99>>-MM''		7$	% -DO,t}}t}}$(B>B>Q>QRZ>[;DM4>4+; )C% %s   C E--E6returnc                 H    |rt        | j                        S | j                  S N)maxr   )r*   	as_scalars     r,   
feat_ratiozPatchEmbed.feat_ratioa   s    t''??"r-   c                    | j                   rPt        j                  |d   | j                  d   z        t        j                  |d   | j                  d   z        fS |d   | j                  d   z  |d   | j                  d   z  fS )z Get grid (feature) size for given image size taking account of dynamic padding.
        NOTE: must be torchscript compatible so using fixed tuple indexing
        r   r
   )r   mathceilr   )r*   r   s     r,   dynamic_feat_sizezPatchEmbed.dynamic_feat_sizeg   s     99Xa[4??1+==>		(ST+X\XgXghiXjJj@kkkA;$//!"44hqkT__UVEW6WWWr-   c                    |j                   \  }}}}| j                  | j                  rat        || j                  d   k(  d| d| j                  d    d       t        || j                  d   k(  d| d| j                  d    d       nr| j                  sft        || j
                  d   z  dk(  d| d| j
                  d    d       t        || j
                  d   z  dk(  d| d| j
                  d    d       | j                  r~| j
                  d   || j
                  d   z  z
  | j
                  d   z  }| j
                  d   || j
                  d   z  z
  | j
                  d   z  }t        j                  |d|d|f      }| j                  |      }| j                  r"|j                  d      j                  dd      }n3| j                  t        j                  k7  rt        || j                        }| j                  |      }|S )	Nr   zInput height (z) doesn't match model ().r
   zInput width (z%) should be divisible by patch size (   )shaper   r   r   r   r   Fpadr'   r   	transposer   r   r%   r   r)   )r*   xBCHWpad_hpad_ws           r,   forwardzPatchEmbed.forwardp   s   WW
1a==$##T]]1--sBYZ^ZgZghiZjYkkm/noT]]1--qcAXY]YfYfghYiXjjl/mn))**a/$QC'LT__]^M_L``bc **a/#A3&KDOO\]L^K__ab __Q'!dooa.@*@@DOOTUDVVE__Q'!dooa.@*@@DOOTUDVVEa!UAu-.AIIaL<<		!&&q!,A__+4??+AIIaLr-   )
            NTNTTF)NN)T)__name__
__module____qualname____doc__r   __annotations__r5   jitFinalboolr   intr   r   r   strr!   r"   r>   rD   rH   rW   __classcell__r+   s   @r,   r   r      s   YY__T** 58  -1 (,$($)KCsCx01K K 	K
 K !*K K !K K "K "K<0uS%S/-A'B 0 ?C@D\uS%S/%9:;\ !sE#s(O';!<=\4#E%S/32F,G #X%S/ XeCHo Xr-   r   c                        e Zd ZU dZeed<   	 	 	 	 	 	 	 	 ddee   dedededee   de	dee
   d	e	f fd
Zdeej                  ee   f   fdZ xZS )PatchEmbedWithSizer   r   r   r   r   r   r   r   r   c	           
      4    t         	|   ||||||||       y )N)r   r   r   r   r   r   r   r   )r    r!   )
r*   r   r   r   r   r   r   r   r   r+   s
            r,   r!   zPatchEmbedWithSize.__init__   s/     	!!! 	 		
r-   r?   c                 4   |j                   \  }}}}| j                  ft        || j                  d   z  dk(  d| d| j                  d    d       t        || j                  d   z  dk(  d| d| j                  d    d       | j	                  |      }|j                   dd  }| j
                  r"|j                  d      j                  dd      }n3| j                  t        j                  k7  rt        || j                        }| j                  |      }||fS )	Nr   zInput image height (z#) must be divisible by patch size (rJ   r
   zInput image width (rK   )rL   r   r   r   r'   r   rO   r   r   r%   r   r)   )r*   rP   rQ   rR   rS   rT   	feat_sizes          r,   rW   zPatchEmbedWithSize.forward   s+   WW
1a==$A**a/3GsJmnrn}n}~  oA  nB  BD  2E  FA**a/3FqcIlmqm|m|}~m  mA  AC  2D  EIIaLGGBCL	<<		!&&q!,A__+4??+AIIaL)|r-   )rX   rY   rZ   r[   NTNT)r\   r]   r^   r_   r   r`   r   rd   r   rc   re   r!   r   r5   Tensorr   rW   rf   rg   s   @r,   ri   ri      s     '*  -1 (,
sm
 
 	

 
 !*
 
 !
 
,E%,,S	"9: r-   ri   new_sizeinterpolation	antialiasr4   c           	         ddl 	 ddlm} t        | j                        dk(  sJ d       t              dk(  sJ d       | j                  dd }t        |      t              k(  r| S |r)t        j                  d	| j                   d
 d d       fdfd} ||      }t        j                  j                  j                  |j                        | j                        fd}	 | ||	dd      dd      }
| j                  }| j!                         }  |
|       } | j#                  |      } | S # t        $ r
 ddlm} Y Aw xY w)a/  Resample the weights of the patch embedding kernel to target resolution.
    We resample the patch embedding kernel by approximately inverting the effect
    of patch resizing.

    Code based on:
      https://github.com/google-research/big_vision/blob/b00544b81f8694488d5f36295aeb7972f3755ffe/big_vision/models/proj/flexi/vit.py

    With this resizing, we can for example load a B/8 filter into a B/16 model
    and, on 2x larger input image, the result will match.

    Args:
        patch_embed: original parameter to be resized.
        new_size (tuple(int, int): target shape (height, width)-only.
        interpolation (str): interpolation for resize
        antialias (bool): use anti-aliasing filter in resize
        verbose (bool): log operation
    Returns:
        Resized patch embedding kernel.
    r   N)vmap   zFour dimensions expectedrK   zNew shape should only be hwrl   zResize patch embedding z to z, w/ z interpolation.c                     t        j                  |       d   }t        j                  ||      d   j	                         }|S )N)NN.)sizemoderq   )r   r   .)r5   rn   rM   interpolatenumpy)x_np	_new_sizex_tfx_upsampledrq   rp   s       r,   resizez(resample_patch_embed_old.<locals>.resize   sI    ||D!/2mmy}	KKTVV[V[V] 	r-   c                    g }t        j                  |             D ]O  }j                  |       }d|j                  ||       <   |j	                   ||      j                  d             Q j                  |      j                  S )Ng      ?)rangeprodzerosunravel_indexappendreshapestackT)	_old_sizer{   mati	basis_vecnpr~   s        r,   get_resize_matz0resample_patch_embed_old.<locals>.get_resize_mat   s~    rwwy)* 	AA+I8:Ib&&q)45JJvi3;;B?@	A xx}r-   )devicec                 N    | j                  d      z  }|j                        S )Nr   )r   )kernelresampled_kernelro   resize_mat_pinvs     r,   resample_kernelz1resample_patch_embed_old.<locals>.resample_kernel   s(    *V^^B-??''11r-   r
   )ry   r5   rs   ImportError	functorchlenrL   r/   _loggerinfotensorlinalgpinvr   r   dtypefloatto)patch_embedro   rp   rq   r4   rs   old_sizer   
resize_matr   v_resample_kernel
orig_dtyper   r~   r   s    ```        @@@r,   resample_patch_embed_oldr      sM   4 # {  !Q&B(BB&x=A<<<  %HX%/).{/@/@.AhZuUbTccrst  (3Jll299>>*,,#?HZHZ[O2 T/1a8!Q?""J##%K#K0K..,KM  #""#s   E EEr   r   r   r?   c                    | \  }}|\  }}	||z  }
||	z  }t        j                  |
||      }|j                  |
d||      }t        j                  ||||d      }|j                  d      j                  ddd      j                  ||
      }|S )zKComputes the resize matrix basis vectors and interpolates them to new_size.)r   r   r
   F)rv   rw   rq   align_cornersrK   r   )r5   eyer   rM   rx   squeezepermute)r   ro   rp   rq   r   r   old_hold_wnew_hnew_w	old_total	new_total
eye_matrixbasis_vectors_batchresized_basis_vectors_batchresize_matrixs                   r,   _compute_resize_matrixr     s     LE5LE5II9V5AJ$,,Y5%H"#--# 077:BB1aKSST]_hiMr-   r   pinv_matrixnew_size_tupler   intermediate_dtypec                     | j                   ^}}}| j                  ||d      j                  |      } |j                  |      }| |z  } |j                  ||g| j                  |      }|S )zW Simplified resampling w/o vmap use.
    As proposed by https://github.com/stas-sl
    r   )r   )rL   r   r   )	r   r   r   r   r   c_outc_in_resampled_patch_embeds	            r,   _apply_resamplingr     s     "''OE4!%%eT2699@R9SK..'9.:K'+59199%WWZZakZl  r-   c                 ~   t        | j                        dk(  sJ d       t        |      dk(  sJ d       t        | j                  dd       }t        |      }||k(  r| S | j                  }| j                  }t        |||||t              }	t        j                  j                  |	      }
t        | |
||t              }|S )z5 Standalone function (computes matrix on each call). rt   z/Input tensor should be 4D (out_ch, in_ch, h, w)rK   z+New shape should only be hw (height, width)rl   N)r   rL   r/   r   r   r   DTYPE_INTERMEDIATEr5   r   r   r   )r   ro   rp   rq   r4   old_size_tupler   r   r   r   r   r   s               r,   r;   r;   .  s     {  !Q&Y(YY&x=ALLL&+K,=,=bc,B&CN&+HoN'F""J'y&J\J ,,##J/K-[.*>P ! r-   c            	            e Zd ZdZ	 	 ddeeef   dedef fdZe	fdeeef   de
j                  de
j                  d	e
j                  fd
Zde
j                  dee   d	e
j                  fdZ xZS ) PatchEmbedResamplerFixedOrigSizez
    Resample patch embedding weights from a fixed original size,
    caching the pseudoinverse matrix based on the target size.
    	orig_sizerp   rq   c                     t         |           t        |t              rt	        |      dk(  sJ d       || _        || _        || _        i | _        y)z
        Args:
            orig_size (Tuple[int, int]): The expected original (height, width) of input patch_embed tensors.
            interpolation (str): Interpolation mode.
            antialias (bool): Use anti-aliasing filter in resize.
        rK   z.`orig_size` must be a tuple of (height, width)N)	r    r!   
isinstancer/   r   r   rp   rq   _pinv_cache_map)r*   r   rp   rq   r+   s       r,   r!   z)PatchEmbedResamplerFixedOrigSize.__init__Q  sS     	)U+I!0C 	=<	=C"*";=r-   ro   r   r   r?   c                    |}| j                   j                  |      }|r8t        | |      r,t        | |      }|j                  |k(  r|j
                  |k(  r|S t        | j                  || j                  | j                  ||      }t        j                  j                  |      }d|d    d|d    }t        | |      rt        | |       | j                  ||       || j                   |<   |S )zRRetrieves the cached pinv matrix or computes and caches it for the given new_size.pinv_r   rP   r
   )r   gethasattrgetattrr   r   r   r   rp   rq   r5   r   r   delattrregister_buffer)r*   ro   r   r   	cache_keybuffer_namer   r   s           r,   _get_or_create_pinv_matrixz;PatchEmbedResamplerFixedOrigSize._get_or_create_pinv_matrixf  s     	**..y9745!$4K!!V+0A0AU0J## ,NNHd&8&8$..&RW

 ll''
3 hqk]!HQK=94%T;'[+6*5Y'r-   r   c                 ~   t        |j                        dk(  sJ t        |      dk(  sJ t        |j                  dd       }|| j                  k(  sJ d| d| j                          t        |      }| j                  |k(  r|S |j                  }|j
                  }| j                  ||      }t        ||||      }|S )a   Resamples the patch embedding weights to new_size.

        Args:
            patch_embed (torch.Tensor): Original weights (out_ch, in_ch, H_orig, W_orig).
            new_size (List[int]): Target [height, width].

        Returns:
            torch.Tensor: Resampled weights.
        rt   rK   rl   NzInput patch_embed spatial size z0 does not match module's expected original size )r   rL   r/   r   r   r   r   r   )	r*   r   ro   
input_sizer   r   r   r   r   s	            r,   rW   z(PatchEmbedResamplerFixedOrigSize.forward  s     ;$$%***8}!!! ;,,RS12
T^^+ 	@-j\ ://3~~.>@	@+ +0/ >>^+## &&
 55nfM !2+{N\f g$$r-   )bicubicT)r\   r]   r^   r_   r   rd   re   rc   r!   r   r5   r   r   rn   r   r   rW   rf   rg   s   @r,   r   r   L  s     '	>c?> > 	>2 0	S/  {{	
 
<"%5<< "%49 "% "%r-   r   c                   f    e Zd ZdZ	 	 	 	 ddeeef   dedededef
 fdZde	j                  d	eeef   d
e	j                  fdZde	j                  d	eeef   d
e	j                  fdZ	 	 	 dde	j                  de	j                  dee	j                     deeeef      ded
e	j                  fdZ xZS )PatchEmbedInterpolatora8  Dynamically interpolates patch embedding weights for variable patch sizes.

    This module wraps patch embedding weight resampling functionality to support
    on-the-fly patch size variation during training. It handles both Conv2d and
    Linear patch embeddings.

    Args:
        base_patch_size: The original patch size the model was initialized with
        in_chans: Number of input channels
        embed_dim: Embedding dimension
        interpolation: Interpolation mode for resampling
        antialias: Whether to use antialiasing during interpolation
    base_patch_sizer   r   rp   rq   c                 h    t         |           || _        || _        || _        || _        || _        y rA   )r    r!   r   r   r   rp   rq   )r*   r   r   r   rp   rq   r+   s         r,   r!   zPatchEmbedInterpolator.__init__  s5     	. "*"r-   r9   target_patch_sizer?   c                 j   || j                   k(  r|S |j                  d   }| j                   \  }}|\  }}|j                  |||| j                        }|j	                  dddd      }t        |||g| j                  | j                  d      }	|	j	                  dddd      }
|
j                  |d      }
|
S )a/  Resample linear patch embedding weights for a new patch size.

        Args:
            weight: Linear weight tensor of shape [embed_dim, patch_h * patch_w * in_chans]
            target_patch_size: Target (patch_h, patch_w) to resample to

        Returns:
            Resampled weight tensor
        r   rZ   r
   rK   Fro   rp   rq   r4   r   )r   rL   r   r   r   r;   rp   rq   )r*   r9   r   r   base_phbase_pw	target_ph	target_pwweight_convweight_conv_resampledweight_resampleds              r,   resample_linear_weightz-PatchEmbedInterpolator.resample_linear_weight  s      4 44MLLO	//0	9 nnY$--P!))!Q15 !5+,,nn!
 188Aq!D+33IrBr-   c                     || j                   k(  r|S t        |t        |      | j                  | j                  d      }|S )a-  Resample conv2d patch embedding weights for a new patch size.

        Args:
            weight: Conv2d weight tensor of shape [embed_dim, in_chans, patch_h, patch_w]
            target_patch_size: Target (patch_h, patch_w) to resample to

        Returns:
            Resampled weight tensor
        Fr   )r   r;   listrp   rq   )r*   r9   r   r   s       r,   resample_conv_weightz+PatchEmbedInterpolator.resample_conv_weight  sK      4 44M 0+,,,nn
  r-   patchesproj_weight	proj_biasr   	is_linearc                    || j                   }|r|| j                   k7  rz|j                  dk(  sJ d       |j                  \  }}}}	}
| j                  ||      }|j	                  ||d      }t
        j                  j                  j                  |||      }|S |j                  dk(  r%|j                  \  }}}}	}
|j	                  ||d      }t
        j                  j                  j                  |||      }|S || j                   k7  rB| j                  ||      }t
        j                  j                  j                  ||||d      }|S t
        j                  j                  j                  ||||d      }|S )aW  Apply patch embedding with dynamic weight resampling.

        Args:
            patches: Input patches
                - For linear mode with resampling: [B, N, Ph, Pw, C]
                - For linear mode without resampling: [B, N, Ph*Pw*C]
                - For conv mode: [B, C, H, W]
            proj_weight: Original projection weight
            proj_bias: Optional projection bias
            patch_size: Current patch size (if None, uses base_patch_size)
            is_linear: Whether using linear (True) or conv2d (False) projection

        Returns:
            Embedded patches
           z0Patches must be [B, N, Ph, Pw, C] for resamplingr   r   )r   padding)r   ndimrL   r   r   r5   r	   
functionallinearr   conv2d)r*   r   r   r   r   r   rQ   NPhPwrR   r   patches_flatoutputs                 r,   rW   zPatchEmbedInterpolator.forward  s   . --JT111||q(\*\\(")--1b"a $(#>#>{J#W   'q!R8,,33LBRT]^* % <<1$&-mmOAq"b!%ooaB7G,,33G[)T  T111#'#<#<[*#U ,,33-y%q 4   ,,33[)%q 4 
 r-   )rZ   r[   r   T)NNT)r\   r]   r^   r_   r   rd   re   rc   r!   r5   rn   r   r   r   rW   rf   rg   s   @r,   r   r     s#   " &#sCx# # 	#
 # #( (  !c?(  
	( T   !c?  
	 @ -104:: \\: ELL)	:
 U38_-: : 
:r-   r   )r   TF)*r_   loggingrF   typingr   r   r   r   r   r   r5   r	   torch.nn.functionalr   rM   formatr   r   helpersr   trace_utilsr   	getLoggerr\   r   Moduler   ri   rd   re   rc   r   float32r   r   r   rn   r   r   r;   r   r    r-   r,   <module>r     s  	   ? ?     #   
'

H
%o od( (^ 'Cs)C C 	C
 CL ]]  ,CHoCHo  	
 LL ;; \\@ '9!!! #s(O! 	!
 ! \\!* '!\\!s)! ! 	!
 !<Z%ryy Z%z^RYY ^r-   