
    kh_                     X   d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZmZmZ ddlZddlmZmZmZ ddlmZ ddlmZ dd	lmZ 	 	 	 dd
ededee   dededefdZ G d d      Zdeeee
eef   f      dee	e      dee	e      de
e	e
eef      e	e   ef   fdZ  G d de      Z!y)a   Dynamic Sequence Length Datasets for Variable Resolution Image Processing

Implements two dataset wrappers:
1. NaFlexMapDatasetWrapper - Map-style dataset that returns batches with variable sequence lengths
TODO: 2. NaFlexIterableDatasetWrapper - Iterable dataset that yields batches with variable sequence lengths

Both support:
- Pre-initialized transforms for efficiency
- Distributed training
- Multiple workers
- Variable batch sizes based on sequence length

Hacked together by / Copyright 2025, Ross Wightman, Hugging Face
    N)partial)AnyIteratorListTupleDictOptionalUnionCallable)DatasetIterableDataset
DataLoader)Image   )Patchify)	to_2tupletokens_per_batchseq_lenmax_sizedivisorroundingreturnc                    | |z  }|dkD  rT|dk(  rt        j                  ||z        |z  }n>|dk(  rt        j                  ||z        |z  }nt        ||z        |z  }nt	        |      }t        d|      }|t        ||      }|S )a  Calculate batch size based on sequence length with divisibility constraints.

    Args:
        tokens_per_batch: Target number of tokens per batch.
        seq_len: Sequence length for this batch.
        max_size: Optional maximum batch size.
        divisor: Ensure batch size is divisible by this value.
        rounding: Rounding method ('floor', 'ceil', 'round').

    Returns:
        Calculated batch size.
    r   floorceil)mathr   r   roundintmaxmin)r   r   r   r   r   raw_batch_size
batch_sizes          T/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/data/naflex_dataset.pycalculate_naflex_batch_sizer$      s    ( &/N {wNW$<=GJ>G#;<wFJ~787BJ (
 Q
#JX.
    c                       e Zd ZdZ	 ddee   ddfdZdeee	e
ej                  f   eeej                  f   f      dee	e
ej                  f   ej                  f   fdZy)	NaFlexCollatorzECustom collator for batching NaFlex-style variable-resolution images.Nmax_seq_lenr   c                     |xs d| _         y)zqInitialize NaFlexCollator.

        Args:
            max_seq_len: Maximum sequence length for batching.
        @  N)r(   )selfr(   s     r#   __init__zNaFlexCollator.__init__L   s     '-#r%   batchc                 (   t        |d   t              sJ t        |      }|D cg c]  }|d   	 }}t        |d   t        j                        rt        j
                  |      }n%t        j                  |t        j                        }|D cg c]  }|d   	 }}| j                  | j                  }nt        d |D              }|d   d   }|j                  dk(  }|r<|j                  \  }	}
}}t        j                  |||
||ft        j                        }n7|j                  d   }t        j                  |||ft        j                        }t        j                  ||dft        j                        }t        j                  ||ft        j                        }t        |      D ]N  \  }}t!        |d   j                  d   |      }|d   d| ||d|f<   |d	   d| ||d|f<   |d
   d| ||d|f<   P ||||d}||fS c c}w c c}w )ao  Collate batch of NaFlex samples.

        Args:
            batch: List of tuples (patch_dict, target).

        Returns:
            A tuple of (input_dict, targets) where input_dict contains:
                - patches: Padded tensor of patches
                - patch_coord: Coordinates for each patch (y, x)
                - patch_valid: Valid indicators
        r   r   )dtypeNc              3   @   K   | ]  }|d    j                   d     yw)patchesr   N)shape).0items     r#   	<genexpr>z*NaFlexCollator.__call__.<locals>.<genexpr>u   s     O4d9o33A6Os   r1         patch_coordpatch_valid)r1   r8   r9   r   )
isinstancetuplelentorchTensorstacktensorint64r(   r   ndimr2   zerosfloat32bool	enumerater    )r+   r-   r"   r4   targetspatch_dictsmax_patchespatches_tensoris_unflattened_phpwcr1   	patch_dimr8   r9   i
patch_dictnum_patchesresults                        r#   __call__zNaFlexCollator.__call__W   s(    %(E***Z
 (--t47--gaj%,,/kk'*Gll7%++>G ,114tAw11 '**K O;OOK %Q	2',,1)//LAr2qkk:{BA"Femm\G ',,Q/Ikk:{I"Femm\G kk:{A">ekkRkk:{";5::N '{3 	SMAzj399!<kJK'1)'<\k'JGA||O$+5m+D\k+RK<K<(+5m+D\k+RK<K<(	S &&"	
 wa . 2s   H
H)N)__name__
__module____qualname____doc__r	   r   r,   r   r   r   strr=   r>   r
   rU    r%   r#   r'   r'   I   s    O *.	.!#	. 
	.@d5c5<<.?)@%U\\HYBZ)Z#[\ @afgkloqvq}q}l}g~  AF  AM  AM  hM  bN @r%   r'   
patch_sizepatch_size_choicespatch_size_choice_probsc                    | |d} | du |du k(  rt        d      | t        |       g}dg}d}n|D cg c]  }t        |       }}|dt        |      z  gt        |      z  }nXt        |      t        |      k7  rt        d      t        t	        |            }|dk  rt        d      |D cg c]  }||z  	 }}d	}|||fS c c}w c c}w )
a  Resolve patch size configuration.

    Args:
        patch_size: Single patch size to use.
        patch_size_choices: List of patch sizes to choose from.
        patch_size_choice_probs: Probabilities for each patch size choice.

    Returns:
        Tuple of (sizes, probs, variable) where sizes is list of patch size tuples,
        probs is list of probabilities, and variable indicates if patch size varies.
    N   z<Specify exactly one of `patch_size` or `patch_size_choices`.g      ?Fz*`patch_size_choice_probs` length mismatch.r   z&`patch_size_choice_probs` sum to zero.T)
ValueErrorr   r<   floatsum)r\   r]   r^   sizesprobsvariablepss           r#   _resolve_patch_cfgri      s   " 08
d 2d :;J
 	
 :&''9:!1::"*3u:%&U3E*+s5z9 !MNNc123AAv !IJJ$;<qQU<E<%!! ; =s   C/C	c            !       F    e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddedeeeeeef   f      dee	e      dee	e
      deedf   d	ed
ee   dee   dedededededededdf  fdZd ZdefdZdeddfdZdefdZdeeeeej,                  f   ej,                  f      fdZ xZS )NaFlexMapDatasetWrappera  
    IterableDataset wrapper for a map-style base dataset.

    Yields batches with variable sequence lengths. It calculates a canonical
    batch schedule (sequence length, batch size pairs) once based on the
    total dataset size (padded for distribution). Each epoch, it shuffles
    the order of this canonical schedule and the dataset indices.
    This ensures a consistent number of batches and samples per epoch
    across all ranks. Handles distributed training and multiple workers.
    Nbase_datasetr\   r]   r^   seq_lens.max_tokens_per_batchtransform_factorymixup_fnseedshuffledistributedrank
world_sizeepochbatch_divisorr   c                    t         |           t        |d      rt        |d      st        d      || _        t        t        t        |                  | _        || _	        |	| _
        |
| _        || _        |r|nd| _        |r|nd| _        || _        || _        t#        |||      \  | _        | _        | _        i | _        i | _        g | _        | j                  D ]  }t1        |      | j,                  |<    t3        | j$                        D ]t  \  }}| j.                  j5                  t7        || j(                                | j                  D ]/  }||f}|r |||      | j*                  |<   !d| j*                  |<   1 v || _        g | _        d| _        d| _        | jA                          g | _!        | jE                  | j                         y)	a  Initialize NaFlexMapDatasetWrapper.

        Args:
            base_dataset: Map-style dataset to wrap.
            patch_size: Single patch size to use.
            patch_size_choices: List of patch sizes to randomly select from.
            patch_size_choice_probs: Probabilities for each patch size.
            seq_lens: Sequence lengths to use for batching.
            max_tokens_per_batch: Target tokens per batch.
            transform_factory: Factory function for creating transforms.
            mixup_fn: Optional mixup function.
            seed: Random seed.
            shuffle: Whether to shuffle data.
            distributed: Whether using distributed training.
            rank: Process rank for distributed training.
            world_size: Total number of processes.
            epoch: Starting epoch.
            batch_divisor: Ensure batch size is divisible by this.
        __len____getitem__zLbase_dataset must be a map-style dataset (implement __len__ and __getitem__)r   r   )r\   flatten_patches)r(   r\   N)#superr,   hasattr	TypeErrorrl   sortedlistsetrm   rn   rq   rr   rs   rt   ru   rv   rw   ri   patch_sizespatch_size_probsvariable_patch_size
transformscollate_fnspatchifiersr'   rF   appendr   rp   _canonical_batch_schedule_num_batches_per_rank_padded_samples_per_rank_create_canonical_schedule_epoch_batches_prepare_epoch_batches)r+   rl   r\   r]   r^   rm   rn   ro   rp   rq   rr   rs   rt   ru   rv   rw   r   	patch_idxpatch_size_tuplekey	__class__s                       r#   r,   z NaFlexMapDatasetWrapper.__init__   s   J 	|Y/w|]7[jkk(tCM23$8!	&'DQ	(3*
* M_#M
I$/1I FH02+-}} 	@G(6w(?DW%	@ ,5T5E5E+F 	0'I'##H+$($<$< <%   == 0	*$+<]m+nDOOC(+/DOOC(0	0 ! AC&*+"-.%'') <>##DJJ/r%   c           
         t        | j                        }|}|}| j                  r| j                  dkD  r|| j                  z  dk7  rA| j                  || j                  z  z
  }||z  }t	        d| j
                   d| d| d       nd}|| j                  z  dk7  rt        d| d| j                         || j                  z  }n| j                  r| j                  dk  r	 || _        |dk(  rg | _        d| _	        y	t        j                         }|j                  | j                         g }|}d}|dkD  rt        j                  dt        | j                        d
|      j!                         }	| j                  |	   }
t#        | j$                  |
|| j&                  d      }t)        d|      }t+        ||      }|dk  rt-        j.                  d|
 d| d       n#|j1                  |
|f       ||z  }||z  }|dkD  r||k7  r+t-        j.                  d| j
                   d| d| d|        || _        t        |      | _	        t	        d| j
                   d| j                   d| j                   d       y	)a%  
        Calculates the canonical batch schedule (seq_len, batch_size pairs)
        based on the dataset size, padded for distributed training.
        This schedule is the *same* for all ranks and ensures consistent
        epoch length. It is calculated once during initialization.
        r   r   Rank z: Padding dataset with z. samples for distributed training (total size z).z$Internal Error: Padded total length z not divisible by world size N)r   	generatorr   )r   r   r   r   r   z$Calculated batch size <= 0 (seq_len=z, remaining=z&). Stopping schedule generation early.z": Canonical schedule accounts for z samples, but expected z samples per rank. This might happen if min_batch_size or batch_divisor constraints prevent utilizing all samples. Check parameters. Remaining samples: z": Created canonical schedule with z batches for z samples/rank.)r<   rl   rs   ru   printrt   RuntimeErrorr   r   r   r=   	Generatormanual_seedrq   randintrm   r4   r$   rn   rw   r   r    warningswarnr   )r+   	total_lenpadded_total_lennum_samples_per_rankpad_sizegcurrent_scheduleremaining_samplestotal_scheduled_samplesseq_idxr   r"   s               r#   r   z2NaFlexMapDatasetWrapper._create_canonical_schedule1  s    ))*	$(! 34??*a/ OOy4??/JK!X-!tyyk)@
Jx  zJ  yK  KM  N  O$//1Q6#&JK[J\\yz~  {J  {J  zK  %L  M  M#3t#F $//Q"6(<%1$.0T+*+T' OO	dii 240"#!#mmAs4=='941MRRTGmmG,G 5!%!:!:*** J Q
+JZ):;JQ!EgYl[lZm  nT   U  V##Wj$9:+#z1#1  !#6 #&::MM		{"DE\D] ^  45 688I7JL *:&%()9%:"dii[ B4C]C]B^^klp  mJ  mJ  lK  KY  Z  	[r%   c                    t        j                         }|j                  | j                  |z          t	        | j
                        }| j                  r&t        j                  ||      j                         }nt        t        |            }|}| j                  rb| j                  dkD  rS| j                  | j                  z  }||kD  r||z
  }||d| z   }t	        |      |k7  rt        dt	        |       d|       | j                  r,| j                  dkD  r|| j                  d| j                     }n|}t	        |      | j                  k7  rat!        j"                  d| j                   dt	        |       d| j                   d	       t%        t	        |      | j                        }	|d|	 }n| j                  }	| j                  rLt        j                  | j&                  |      j                         }
|
D cg c]  }| j(                  |    }}nt        | j(                        }g | _        d
}d
}|D ]x  \  }}t%        ||	|z
        }|d
k  r0||	k  r)t!        j"                  d| j                   d| d|	 d        n1||||z    }| j*                  j-                  ||f       ||z  }||z  }z ||	k7  r0t!        j"                  d| j                   d| d|	 d|	|z
   d	       yyc c}w )a  
        Prepares the batches for the current epoch by:
        1. Shuffling the full dataset indices (using epoch seed).
        2. Applying padding if in distributed mode.
        3. Selecting indices for the current rank.
        4. Shuffling the *order* of the canonical batch schedule (using epoch seed).
        5. Assigning the rank's indices to the shuffled batches.
        r   r   Nz)Internal Error: Padded index list length z does not match expected r   z#: Number of indices for this rank (z3) does not match expected padded samples per rank (z*). Epoch generation might be inconsistent.r   z: Ran out of samples (/z?) before processing entire schedule. Check schedule generation.z: Assigned z" samples to batches, but expected z2 effective samples this epoch. Indices remaining: .)r=   r   r   rq   r<   rl   rr   randpermtolistr   rangers   ru   r   r   rt   r   r   r    r   r   r   r   )r+   rv   r   r   all_indices_shuffledindices_for_ranksr   r   indices_this_rankeffective_samples_this_rankschedule_permrQ   shuffled_scheduleidx_posscheduled_samples_countr   bs	actual_bsbatch_indicess                      r#   r   z.NaFlexMapDatasetWrapper._prepare_epoch_batches  sE    OO	dii%'( ))*	<<#(>>)q#I#P#P#R #'i(8#9  1! 3#<<tN)++i7$8;OPYQY;Z$Z!$%)99#&OPSTePfOg  hA  BR  AS  %T  U  U ! 3 1$))2LT__2L M 1  !T%B%BB]]#FsK\G]F^ _EEIEbEbDc d;< ,/s3D/EtGdGd+e(!23O4O!P ,0+H+H( <<!NN4+E+EQRSZZ\MLY Zq!?!?!B Z Z $T%C%C D !"#, 	1KGRB ;g EFIA~+.II]]U499+5KG9TUVqUr  sr  $s  t-g)8KLM&&'?@y G#y0#	1 #&AA]]		{+.E-F G  ;< =&&AD[&[%\\]_ B/ ![s   <K(c                 T    || j                   k7  r|| _         | j                  |       yy)zxUpdates the epoch, regenerating the epoch-specific batches.

        Args:
            epoch: New epoch number.
        N)rv   r   )r+   rv   s     r#   	set_epochz!NaFlexMapDatasetWrapper.set_epoch  s*     DJJDJ''. r%   c                     | j                   S )zReturns the number of batches per worker for the current epoch.

        Returns:
            Number of batches this worker will process.
        )r   )r+   s    r#   ry   zNaFlexMapDatasetWrapper.__len__  s     )))r%   c           
   #   *  K   t         j                  j                  j                         }|r|j                  nd}|r|j
                  nd}| j                  |d|   }|D ]T  \  }}|s
d}| j                  rAt        j                  t        j                  | j                        d      j                         }||f}| j                  j                  |      }	| j                  |   }
g }g }|D ]_  }	 | j                  |   \  }}|	r |	|      n|}|t!        j"                  d| d       >|j%                  |       |j%                  |       a | j*                  | j+                  ||      \  }}|D cg c]
  } |
|       }}t-        t/        ||            }|s> | j0                  |   |       W y# t&        $ r t!        j"                  d| d       Y t(        $ r'}t!        j"                  d| d	| d       Y d}~d}~ww xY wc c}w w)
zIterates through pre-calculated batches for the current epoch.

        Yields:
            Tuple of (input_dict, targets) for each batch.
        r   r   Nz"Transform returned None for index z. Skipping sample.z!IndexError encountered for index z= (possibly due to padding/repeated indices). Skipping sample.zError processing sample index z	. Error: )r=   utilsdataget_worker_infonum_workersidr   r   multinomialr@   r   r4   r   getr   rl   r   r   r   
IndexError	Exceptionrp   r   zipr   )r+   worker_infor   	worker_idbatches_for_workerr   indicesr   transform_key	transformbatch_patchifier
batch_imgsbatch_targetsidximglabelprocessed_imgebatch_sampless                      r#   __iter__z NaFlexMapDatasetWrapper.__iter__  s(     kk&&6681<k--!&1KNNq	 "001GK1GH 2 /	?GW I''!--ell4;P;P.QSTUZZ\	 %i0M++M:I#//	:JM !%!2!23!7JC 7@IcNSM$, (J3%Oa&bc %%m4!((/. }}(,0MM*m,T)
M;EFC*3/FJF Z!?@M/d&&w/>>_/	?@ " ]]%Fse  LI  $J  K  MM$B3%yQRPSSe"fg GsZ   C-H09F8)H*"F8'H3HHH8"HHH$H HHH)NNN)      r*   i  i   i @  NN*   TFr   r   r      )rV   rW   rX   rY   r   r	   r
   r   r   r   rb   r   rE   r,   r   r   r   ry   r   r   rZ   r=   r>   r   __classcell__)r   s   @r#   rk   rk      s   	 AE6:=A(B(048+/  %!"!]0!]0 !sE#s(O';!<=]0 !)c 3	]0
 &.d5k%:]0 CHo]0 #&]0  (1]0 x(]0 ]0 ]0 ]0 ]0 ]0 ]0  !]0" 
#]0~S[lTC Tl	/s 	/t 	/* *<?(5c5<<.?)@%,,)N#OP <?r%   rk   )Nr   r   )"rY   r   randomr   	functoolsr   typingr   r   r   r   r   r	   r
   r   r=   torch.utils.datar   r   r   PILr   naflex_transformsr   timm.layersr   r   rZ   r$   r'   rb   rE   ri   rk   r[   r%   r#   <module>r      s       N N N  A A  ' ! #'((( 3-( 	(
 ( 	(VN Nb)"U3c3h#789)"$T#Y/)" "*$u+!6)" 4c3h $u+t34	)"Xf?o f?r%   