
    kh                     <   d Z ddlZddlZddlZddlmZmZmZmZm	Z	m
Z
 ddlZddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZmZ 	 	 	 	 	 d&d
e	eef   de
ee	eef   f   dededee   dede	ee	eef   f   fdZ ed       ed      fZ G d dej:                  j<                        Z G d dej:                  j<                        Z  G d dej:                  j<                        Z! G d dej:                  j<                        Z"d'dZ# G d dej:                  j<                        Z$	 	 	 d(dejJ                  de	eef   d ed!ed"ede
ejJ                  e	ejJ                  ejJ                  ejJ                  f   f   fd#Z& G d$ d%ej:                  j<                        Z'y))az   NaFlex (NaViT + FlexiViT) Transforms and Collation

Implements PyTorch versions of the transforms described in the NaViT and FlexiViT papers:
- NaViT: https://arxiv.org/abs/2307.14995
- FlexiViT: https://arxiv.org/abs/2212.08013

Enables variable resolution/aspect ratio image handling with efficient patching.

Hacked together by / Copyright 2025, Ross Wightman, Hugging Face
    N)DictListOptionalSequenceTupleUnion)Image)
transforms)
functional)InterpolationMode   )str_to_interp_modecrop_or_padcenter_crop_or_padimage_hw
patch_sizemax_seq_lendivisible_by_patch	max_ratioepsreturnc                     t        |t              r||cnt        |      dk7  rt        d      |\  dk  sdk  rt        d       fdfd}|dz  }d}||z
  |k\  r||z   d	z  }	 ||	      r|	}n|	}||z
  |k\  r|}
|t	        |
|      }
|
|k  rt        d
      |
dk\  rt        d       |
      }|
|fS )a  Determine scaling ratio and image size for sequence length constraint.

    Calculates the scaling ratio needed so that when image_hw is scaled,
    the total number of resulting patches does not exceed max_seq_len.

    Args:
        image_hw: Original image dimensions (height, width).
        patch_size: Patch dimensions. If int, patches are square.
        max_seq_len: Maximum allowed sequence length.
        divisible_by_patch: Whether resulting dimensions must be divisible by patch_size.
        max_ratio: Optional cap on scaling ratio to prevent excessive upsampling.
        eps: Convergence threshold for binary search.

    Returns:
        Tuple of (ratio, target_hw) where ratio is the scaling factor and
        target_hw is the resulting (height, width) after scaling.
       Cpatch_size tuple must have exactly two elements (patch_h, patch_w).r   z'patch_size dimensions must be positive.c                     d   | z  }d   | z  }r6t        j                  |z        z  }t        j                  |z        z  }t        t        |            }t        t        |            }||fS )zYScale image_hw by ratio and optionally round dimensions to multiples of patch_h, patch_w.r   r   )mathceilintmax)ratioscaled_hscaled_wr   r   patch_hpatch_ws      W/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/data/naflex_transforms.pyprepare_target_hwz1get_image_size_for_seq.<locals>.prepare_target_hwA   s    A;&A;& 8g+=!>>H8g+=!>>H s8W-.s8W-.!!    c                 B     	|       \  }}|z  }|z  }||z  }|k  S )zACheck if scaling by 'ratio' keeps patch count within max_seq_len. )
r    t_ht_wnum_patches_hnum_patches_wseq_lenr   r#   r$   r&   s
         r%   is_feasiblez+get_image_size_for_seq.<locals>.is_feasibleQ   s:    $U+S ww-/+%%r'   g      $@g      Y@g       @z0Binary search failed - image might be too large?z0Binary search failed - image might be too small?)
isinstancer   len
ValueErrormin)r   r   r   r   r   r   r/   lbrbmidr    	target_hwr#   r$   r&   s   ` ``        @@@r%   get_image_size_for_seqr8      s	   6 *c"%z z?abcc% !|w!|BCC" 
& 
tB	B 7s
Bw#osBB 7s
 E E9% |KLL~KLL "%(I)r'   bilinearbicubicc                        e Zd ZdZ	 	 	 	 ddedededee   dee	e
ee
df   f   d	df fd
Zdej                  d	ej                  fdZ xZS )ResizeToSequencezResize image to fit within a maximum sequence length constraint when patchified.

    This maintains aspect ratio while ensuring the resulting image, when divided into patches,
    will not exceed the specified maximum sequence length.
    Nr   r   r   r   interpolation.r   c                     t         |           || _        || _        || _        || _        t        |t              r"|dk(  rt        | _	        yt        |      | _	        y|| _	        y)aj  Initialize ResizeToSequence transform.

        Args:
            patch_size: Size of patches.
            max_seq_len: Maximum sequence length constraint.
            divisible_by_patch: Whether dimensions must be divisible by patch_size.
            max_ratio: Optional cap on scaling ratio.
            interpolation: Interpolation method or methods.
        randomN)super__init__r   r   r   r   r0   str_RANDOM_INTERPOLATIONr=   r   )selfr   r   r   r   r=   	__class__s         r%   rA   zResizeToSequence.__init__   s]    " 	$&"4"mS)(%:"%7%F"!.Dr'   imgc                    t         j                  j                  |      \  }}}t        ||f| j                  | j
                  | j                  | j                        \  }}t        | j                  t        t        f      r t        j                  | j                        }n| j                  }t         j                  j                  |||d      }|S )zResize image to maintain aspect ratio and fit sequence constraint.

        Args:
            img: Input image tensor.

        Returns:
            Resized image tensor.
        )r   r   T)r=   	antialias)r
   r   get_dimensionsr8   r   r   r   r   r0   r=   tuplelistr?   choiceresize)rD   rF   _hwr7   r=   resized_imgs           r%   forwardzResizeToSequence.forward   s     ''66s;1a-FOO#66nn
9 d((5$-8"MM$*<*<=M ..M ++223	Q^jn2or'   )   TNr:   )__name__
__module____qualname____doc__r   boolr   floatr   rB   r   r   rA   torchTensorrR   __classcell__rE   s   @r%   r<   r<   ~   s      $'+)-Zc// / !%	/
  / !&7?PRU?U9V!VW/ />5<< ELL r'   r<   c                   d     e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 d fd	Ze	 	 	 	 	 	 dd       Zd Zd Z xZ	S )ResizeKeepRatioToSequencezS
    Resize and Keep Aspect Ratio, adapted to fit sequence length constraints.
    c                    t         |           || _        || _        || _        t        |      | _        |dk(  rt        | _        nt        |      | _        || _
        || _        || _        |	| _        |
| _        || _        y)ua  
        Args:
            patch_size: Size of patches (int or tuple of (patch_h, patch_w))
            max_sequence_len: Maximum allowed sequence length for the resulting image
            divisible_by_patch: If True, ensure dimensions are divisible by patch_size
            longest: Float between 0-1 where 0=shortest side, 1=longest side determines scale
            interpolation: Interpolation method for resizing
            random_scale_prob: Probability of applying random scaling
            random_scale_range: Range for random scaling factor (min, max)
            random_scale_area: If True, scale factors affect area (√ factor)
            random_aspect_prob: Probability of applying random aspect ratio jittering
            random_aspect_range: Range for random aspect ratio (min, max)
            max_ratio: Maximum allowed scaling ratio
        r?   N)r@   rA   r   max_sequence_lenr   rY   longestrC   r=   r   random_scale_probrandom_scale_rangerandom_scale_arearandom_aspect_probrandom_aspect_ranger   )rD   r   ra   r   rb   r=   rc   rd   re   rf   rg   r   rE   s               r%   rA   z"ResizeKeepRatioToSequence.__init__   s~    8 	$ 0"4W~H$!6D!3M!BD!2"4!2"4#6 "r'   c                 N   t        j                  |       dd \  }}t        ||f||||
      \  }}|\  }}||z  }||z  }t        ||      |z  t	        ||      d|z
  z  z   }|dkD  rRt        j
                         |k  r;t        j                  |d   |d         }|rdt        j                  |      z  }||f}nd}|dkD  rt        j
                         |k  rzt        j                  |	d         t        j                  |	d         f}t        j                  t        j                  |       }t        j                  |      }|d   |z  |d   |z  f}t        ||f|      D cg c]  \  }}t        ||z  |z         }}}t        |t              r||}}n|\  }}t        |d   |      |d<   t        |d   |      |d<   |rB|t        j                  |d   |z        z  |d<   |t        j                  |d   |z        z  |d<   |d   |z  }|d   |z  }||z  }||kD  rt        j                  ||z        }t        |d   |z        |d<   t        |d   |z        |d<   |rB|t        j                  |d   |z        z  |d<   |t        j                  |d   |z        z  |d<   |S c c}}w )zGet parameters for resizing.r   N      ?r   )ri   ri   )FrI   r8   r   r3   r?   uniformr   sqrtlogexpziproundr0   r   r   ) rF   r   ra   r   rb   rc   rd   re   rf   rg   r   img_himg_wrN   r7   target_htarget_wratio_hratio_wr    ratio_factor
log_aspectaspect_factordimfsizephpwr,   r-   r.   
scale_backs                                    r%   
get_paramsz$ResizeKeepRatioToSequence.get_params   s     '',QR0u .EN
9 '( U"U"GW%/#gw2G2PW<2XX q V]]_7H%H!>>*<Q*?ASTUAVWL !DIIl$;;(,7L#L !fmmo8J&J((#6q#9:DHHEXYZE[<\]J HHV^^Z%@AM IIm4M(Om;\!_}=\]L 69%5VW63cEkAo&WW j#&BFB d1gr"Qd1gr"Q 499T!Wr\22DG499T!Wr\22DG Q2Q2-/%%#3g#=>J$q'J./DG$q'J./DG "tyya266Qtyya266QC Xs   !J!c                    | j                  || j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  | j                        }t        | j                  t        t        f      r t        j                   | j                        }n| j                  }t#        j$                  |||      S )zb
        Resize the image with aspect ratio preservation and sequence length constraints.
        )r   r   ra   r   rb   rc   rd   re   rf   rg   r   r0   r=   rJ   rK   r?   rL   rj   rM   )rD   rF   r|   r=   s       r%   rR   z!ResizeKeepRatioToSequence.forwardK  s     OO!!##LL""##""##$$NN
 d((5$-8"MM$*<*<=M ..MxxT=11r'   c                 *   t        | j                  t        t        f      rdnt	        | j                        }| j
                  j                   d| j                   d| j                   d| j                  dd| j                  dd| j                  ddS )	Nr?   (patch_size=, max_sequence_len=z
, longest=z.3fz, random_scale_prob=z, random_aspect_prob=))r0   r=   rJ   rK   rB   rE   rT   r   ra   rb   rc   rf   )rD   interpolate_strs     r%   __repr__z"ResizeKeepRatioToSequence.__repr__d  s    &01C1CeT]&S(Y\]a]o]oYp>>**+<7H I$$($9$9#: ;<<, -%%)%;%;C$@ A&&*&=&=c%B!	E 	Fr'   )   rS   T        r9   r   )g333333?g?Fr   g?g(\?N)r   )ri   gHzG?Fr   r   N)
rT   rU   rV   rW   rA   staticmethodr   rR   r   r\   r]   s   @r%   r_   r_      sg     !#$ +#! +,#\  !*#! +U Un22Fr'   r_   c                   \     e Zd ZdZ	 	 	 d	dedededeeeeeef   f   def
 fdZ	d Z
 xZS )
CenterCropToSequencezVCenter crop the image such that the resulting patch sequence length meets constraints.r   r   r   fillpadding_modec                 h    t         |           || _        || _        || _        || _        || _        y N)r@   rA   r   r   r   r   r   )rD   r   r   r   r   r   rE   s         r%   rA   zCenterCropToSequence.__init__o  s6     	$&"4	(r'   c                     t         j                  j                  |      \  }}}t        ||f| j                  | j
                  | j                        \  }}t        ||| j                  | j                        S )zKCenter crop the image to maintain aspect ratio and fit sequence constraint.)r   r   )
r
   r   rI   r8   r   r   r   r   r   r   )rD   rF   rN   rO   rP   r7   s         r%   rR   zCenterCropToSequence.forward  si    ''66s;1a-FOO##	
9 "#ytyytO`O`aar'   Tr   constant)rT   rU   rV   rW   r   rX   r   r   rB   rA   rR   r\   r]   s   @r%   r   r   m  sa    `
 (,56 *)) ) !%	)
 U3S=112) ) br'   r   c                   x     e Zd ZdZ	 	 	 ddedededeeeeeef   f   def
 fdZ	e
d        Zd	 Zd
efdZ xZS )RandomCropToSequencea,  Randomly crop and/or pad the image to fit sequence length constraints.

    This maintains aspect ratio while ensuring the resulting image, when divided into patches,
    will not exceed the specified maximum sequence length. Similar to CentralCropToSequence
    but with randomized positioning.
    r   ra   r   r   r   c                 h    t         |           || _        || _        || _        || _        || _        y)a  
        Args:
            patch_size: Size of patches (int or tuple of (patch_h, patch_w))
            max_sequence_len: Maximum allowed sequence length for the resulting image
            divisible_by_patch: If True, resulting image dimensions will be multiples of patch_size
            fill: Fill value for padding
            padding_mode: Padding mode ('constant', 'edge', 'reflect', 'symmetric')
        N)r@   rA   r   ra   r   r   r   )rD   r   ra   r   r   r   rE   s         r%   rA   zRandomCropToSequence.__init__  s6      	$ 0"4	(r'   c           
         t         j                  j                  |       \  }}}||d   z
  }||d   z
  }|dk(  rd}n<t        t	        j
                  t        j                  dt        |            |            }|dk(  rd}||fS t        t	        j
                  t        j                  dt        |            |            }||fS )z!Get random position for crop/pad.r   r   )	r
   r   rI   r   r   copysignr?   randintabs)	rF   target_sizerN   image_heightimage_widthdelta_heightdelta_widthtoplefts	            r%   r   zRandomCropToSequence.get_params  s     (2'<'<'K'KC'P$<#k!n4!KN2 1CdmmFNN1c,6G$H,WXC!D Dy t}}V^^As;7G%H+VWDDyr'   c           	      0   t         j                  j                  |      \  }}}t        ||f| j                  | j
                  | j                  d      \  }}| j                  ||      \  }}t        ||||d   |d   | j                  | j                        S )zTRandomly crop or pad the image to maintain aspect ratio and fit sequence constraint.ri   )r   r   r   )r   r   heightwidthr   r   )r
   r   rI   r8   r   ra   r   r   r   r   r   )rD   rF   rN   rq   rr   r7   r   r   s           r%   rR   zRandomCropToSequence.forward  s     %//>>sC5% .ENOO!!##
9 OOC3	T Q<A,**
 	
r'   r   c                     | j                   j                   d| j                   d| j                   d| j                   dS )Nr   r   , divisible_by_patch=r   )rE   rT   r   ra   r   )rD   s    r%   r   zRandomCropToSequence.__repr__  sN    >>**+<7H I$$($9$9#: ;&&*&=&=%>aA 	Br'   r   )rT   rU   rV   rW   r   rX   r   r   rB   rA   r   r   rR   r   r\   r]   s   @r%   r   r     s     (,56 *)) ") !%	)
 U3S=112) ).  &
:B# Br'   r   c                     t        | t              rt        |       |k7  rt        | d| d      | d   | d   kD  r0t	        j
                  |j                          d       | d   | d   fS | S )Nz  should be a sequence of length .r   r   z range reversed. Swapping.)r0   r   r1   r2   warningswarn
capitalize)valuenamelengths      r%   _validate_ranger     sv    eX&#e**>D6!A&KLL Qx%(*++EFGQxq!!Lr'   c                       e Zd ZdZ	 	 	 	 	 	 	 	 	 ddeeeeef   f   ded	eeef   d
eeef   deee	f   de
dee   deeeef      def fdZedddddddefdej                   d	eeef   d
eeef   dedededede
dee   deeeef      deee	   e	f   deeeeeef   eeef   e	f   fd       Zdej                   dej                   fdZdefdZ xZS )RandomResizedCropToSequencea	  
    Randomly crop the input image to a subregion with varying area and aspect ratio
    (relative to the original), then resize that crop to a target size. The target size
    is determined such that patchifying the resized image (with `patch_size`)
    does not exceed `max_seq_len` patches, while maintaining the aspect ratio of the crop.

    This combines aspects of torchvision's RandomResizedCrop with sequence length constraints.

    Args:
        patch_size (int or tuple[int, int]):
            Patch dimensions (patch_h, patch_w) for sequence length calculation.
        max_seq_len (int):
            Maximum number of patches allowed in the final image.
        scale (tuple[float, float]):
            Range (min, max) of area fraction of the original image to crop.
        ratio (tuple[float, float]):
            Range (min, max) of aspect ratio *multipliers* for the crop, relative
            to the original image's aspect ratio. E.g., (0.75, 1.333) means the
            crop's aspect ratio will be sampled between 0.75*orig_ar and 1.333*orig_ar.
            Uses log-uniform sampling.
        interpolation (str or InterpolationMode):
            Interpolation mode for resizing. Can be 'bilinear', 'bicubic', 'nearest',
            or 'random' (chooses between bilinear and bicubic).
            Defaults to 'bicubic'.
        divisible_by_patch (bool):
            If True, the final image height and width will be multiples of the
            respective patch dimensions. Defaults to True.
        max_ratio (float, optional):
            An optional upper limit on the scaling ratio applied during resizing.
            Prevents excessive upsampling of the initial crop. `max_ratio=1.0`
            prevents any upsampling beyond the cropped size. Defaults to None (no limit).
        final_scale_range (tuple[float, float], optional):
            If provided, applies an *additional* random scaling factor to the
            final target size. The factor is sampled uniformly from this range,
            and multiplied by the size determined by `get_image_size_for_seq`.
            E.g., (0.8, 1.0) means the final size will be between 80% and 100%
            of the maximum feasible size. Defaults to None (use maximum feasible size).
        attempts (int):
            Number of attempts to sample a valid crop geometry before falling back
            to a center crop strategy. Defaults to 10.
    r   rS   TN
   r   r   scaler    r=   r   r   final_scale_rangeattemptsc
                    t         
|           t        |t              r||c| _        | _        n(t        |      dk7  rt        d      |\  | _        | _        || _        || _	        || _
        || _        || _        || _        |	| _        t        |t              r"|dk(  rt         | _        nt%        |      | _        n|| _        t'        | j                  d      | _	        t'        | j                  d      | _
        | j                  _t'        | j                  d      | _        d| j                  d   cxk  r| j                  d	   cxk  rd
k  sn t)        j*                  d       y y y )Nr   r   r?   r   r    r   r   r   r   ri   z?final_scale_range values should ideally be between 0.0 and 1.0.)r@   rA   r0   r   r#   r$   r1   r2   r   r   r    r   r   r   r   rB   rC   r=   r   r   r   r   )rD   r   r   r   r    r=   r   r   r   r   rE   s             r%   rA   z$RandomResizedCropToSequence.__init__  sB    	j#&)3Z&DL$, :!# !fgg)3&DL$,&

"4"!2 mS)(%:"%7%F"!.D %TZZ9
$TZZ9
 !!-%4T5K5KM`%aD" 411!4X8N8Nq8QXUXX_` Y	 .r'   rF   crop_attemptsr#   r$   r   c                    t        j                  |       \  }}}|dk  s|dk  rt        d| d|       ||z  }||z  }t        j                  |d         t        j                  |d         f}t        |      D ]  }|t        j                  |d   |d         z  }t        j                  t        j                  |d   |d               }||z  }t        t        t        j                  ||z                    }t        t        t        j                  ||z                    }d|cxk  r|k  sn d|cxk  r|k  sn t        j                  d||z
        }t        j                  d||z
        } n ||d   z  }||d   z  }||k  r$|}t        t        t        ||z              |      }n-||kD  r$|}t        t        t        ||z              |      }n|}|}t        d|      }t        d|      }||z
  dz  }||z
  dz  }t        ||f||f|||      \  }}|}|	|	\  }}t        j                  ||      }t        t        |d      d      }||z  |z  } ||z  |z  }!|r7|t        j                   | |z        z  }"|t        j                   |!|z        z  }#n(t        t        |             }"t        t        |!            }#t        |"|      }"t        |#|      }#|"|#f}|d   |z  }$|d   |z  }%|$|%z  |kD  r%|}t#        j$                  d	|d
d| d| d| d	       t'        |
t(        t*        f      rt        j,                  |
      }
n|
}
||||f||
fS )zP Get parameters for a random sized crop relative to image aspect ratio.
        r   z1Input image must have positive dimensions, got H=z, W=r   r   )r   r   r   r   r   ri   zFinal scale randomization (z.2fz) resulted in size z exceeding max_seq_len=z, after rounding. Reverting to feasible size r   )rj   rI   r2   r   rm   ranger?   rk   rn   r   rp   rl   r   r3   r   r8   r   r   r   r0   rJ   rK   rL   )&rF   r   r    r   r#   r$   r   r   r   r   r=   rN   r   r   areaorig_aspect	log_ratiotarget_areaaspect_ratio_factoraspect_ratiocrop_hcrop_wr   r   min_aspect_ratiomax_aspect_ratiofeasible_ratiofeasible_size
final_sizemin_scmax_scscale_factorraw_hraw_wrs   rt   r,   r-   s&                                         r%   r   z&RandomResizedCropToSequence.get_paramsJ  s     ++C065Q;%1* QRXQYY]^c]deff~fnXXeAh'%();<	}% '	)Aa%(!CCK"&((6>>)A,	RS+U"V&)<<L tyy|)CDEFFtyy|)CDEFF6"U"q6';V';nnQ8~~a8'	)$  +U1X5*U1X5--Sv0@'@!ABFK//Sv0@'@!ABEJ  F^FF^FF?q(CFNq(D )?V)#1)
% #
(.NFF!>>&&9Ls<5s;L ^+l:E^+l:E ""TYYuw%??"TYYuw%??uU|,uU|, 8W-H8W-H"H-J 'qMW4M&qMW4M-<+!<\#<NNablam  nE  FQ  ER  R~  L  ~M  MN   O  P meT]3"MM-8M)MT66*JEEr'   c                 @   | j                  || j                  | j                  | j                  | j                  | j
                  | j                  | j                  | j                  | j                  
      \  }}}|\  }}}}t        j                  |||||||d      }	|	S )N)	r   r    r   r#   r$   r   r   r   r=   T)r   r   r   r   r|   r=   rH   )r   r   r    r   r#   r$   r   r   r   r=   rj   resized_crop)
rD   rF   crop_paramsr   r=   r   r   r   r   outputs
             r%   rR   z#RandomResizedCropToSequence.forward  s    15****--LLLL#66(("44,, 2A 2
.Z %0!T66'	
 r'   c                 @   t        | j                  t        t        f      r#dj	                  d | j                  D              }nt        | j                        }| j                  j                  dz   }|d| j                   d| j                   dz  }|d| j                   z  }|d| j                   z  }|d| j                   z  }|d	| d
z  }|d| j                   z  }|d| j                   z  }|d| j                   z  }|d| j                    z  }|dz  }|S )Nz, c              3   V   K   | ]!  }t        |      j                  d       d    # yw)r   N)rB   split).0ms     r%   	<genexpr>z7RandomResizedCropToSequence.__repr__.<locals>.<genexpr>  s"     'Z!AS(9"(='Zs   ')(zpatch_size=(r   z, max_seq_len=z, scale=z, ratio=z, interpolation=[]r   z, max_ratio=z, final_scale_range=z, attempts=)r0   r=   rJ   rK   joinrB   rE   rT   r#   r$   r   r   r    r   r   r   r   )rD   r   format_strings      r%   r   z$RandomResizedCropToSequence.__repr__  s9   d((5$-8"ii'ZtGYGY'ZZO!$"4"45O//#5<~R~QGG>$*:*:);<<8DJJ<008DJJ<00,_,=Q??01H1H0IJJ<'788/0F0F/GHH;t}}o66r'   )	r   rS   )g{Gz?ri   )g?g      ?r:   TNNr   )rT   rU   rV   rW   r   r   r   rY   rB   r   rX   r   rA   r   rC   rZ   r[   r   r   rR   r   r\   r]   s   @r%   r   r     s   (X 35%0%/7@#'%);?-a#uS#X./-a -a UE\"	-a
 UE\"-a S"334-a !-a E?-a $E%,$78-a -a^ 
 "$#'+)-?COdrFrF&rF &rF 	rF
 rF rF rF !%rF  rF  (eUl(;<rF !&7!8:K!KLrF 
uS#sC'(%S/;LL	MrF rFh5<< ELL :# r'   r   rF   padinclude_infoflatten_patchesc                 
   | j                   \  }}}|\  }}	|rd||z  dk7  s||	z  dk7  rT|||z  z
  |z  }
|	||	z  z
  |	z  }t        j                  j                  j	                  | d|d|
f      } | j                   \  }}}||z  ||	z  }}| j                  |||||	      j                  ddddd      }|r|j                  d||	z  |z        n|j                  d||	|      }|rt        j                  t        j                  |      t        j                  |      d      \  }}t        j                  |j                  d      |j                  d      gd	      }t        j                  ||z  t        j                  
      }|||fS |S )Nr   r      r      r   ij)indexing)rz   )dtype)shaperZ   nnr   r   viewpermutereshapemeshgridarangestackonesrX   )rF   r   r   r   r   crO   rP   r}   r~   pad_hpad_wnhnwpatchesy_idxx_idxcoordvalids                      r%   patchify_imager     su    iiGAq!FB B!q2v{a"f"a"f"hh!!%%cAua+?@))1a "Wa2gBhhq"b"b)11!Q1a@G2Agoob"r'A+.wWY[]_acdGeG~~ell2&6R8HSWXuU]]2.b0ABJ

27%**5u$$Nr'   c                   J     e Zd ZdZ	 ddeeeeef   f   def fdZd Z	 xZ
S )PatchifyzSTransform an image into patches with corresponding coordinates and type indicators.r   r   c                 f    t         |           t        |t              r|n||f| _        || _        y r   )r@   rA   r0   rJ   r   r   )rD   r   r   rE   s      r%   rA   zPatchify.__init__  s0    
 	(2:u(E*JXbKc.r'   c                     t        |t        j                        rt        j                  j	                  |      }t        || j                  | j                        \  }}}|||dS )a  
        Args:
            img: A PIL Image or tensor of shape [C, H, W]

        Returns:
            A dictionary containing:
                - patches: Tensor of shape [N, P*P*C] if flatten_patches=True,
                          or [N, Ph, Pw, C] if flatten_patches=False
                - patch_coord: Tensor of shape [N, 2] with (y, x) coordinates
                - patch_valid: Valid indicator (all 1s for non-padding patches)
        )r   )r   patch_coordpatch_valid)r0   r	   r
   r   	to_tensorr   r   r   )rD   rF   r   r   r   s        r%   rR   zPatchify.forward  s]     c5;;'''11#6C .sDOOUYUiUi j   
 	
r'   )T)rT   rU   rV   rW   r   r   r   rX   rA   rR   r\   r]   s   @r%   r   r     s9    ]
 %)/c5c?23/ "/
r'   r   )r   rS   TNgh㈵>)r   )TTT)(rW   r   r?   r   typingr   r   r   r   r   r   rZ   PILr	   torchvisionr
   torchvision.transformsr   rj   !torchvision.transforms.functionalr   r   r   r   r   rX   rY   r8   rC   r   Moduler<   r_   r   r   r   r   r[   r   r   r)   r'   r%   <module>r     s  	    ? ?   " 2 ? K K
 35#'%)^S/^#uS#X./^ ^ !	^
 E?^ ^ 5%S/!"^B ,J79KI9VW ?uxx ?DjF jFZb588?? b@SB588?? SBl
|%((// |D ! $!\\!#s(O! ! 	!
 ! 5<<u||U\\5<<GHHI!H"
uxx "
r'   