
    khPB                        d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZmZ ddlZddlmZ ddlZddlmZ ddlmZmZmZ 	 ddl Z!dd	l"m#Z#m$Z$ dd
l%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+ ddl-m.Z. ddl/m0Z0 ddl1m2Z2  ejf                  e4      Z5 e6ejn                  jq                  dd            Z9 e6ejn                  jq                  dd            Z:d#dZ;e	 G d d             Z<de=defdZ>d Z?	 	 	 	 d$dZ@d ZAe! G d de!j                        ZCndZC G d d e      ZD G d! d"e0      ZEy# e,$ r dZ!dZ&Y w xY w)%zR Dataset reader for webdataset

Hacked together by / Copyright 2022 Ross Wightman
    N)	dataclass)partial)islice)AnyCallableDictListOptionalTuple)Image)DatasetIterableDatasetget_worker_info)_shufflegetfirst)expand_urls)base_plus_ext
url_openertar_file_expandervalid_sample   )load_class_map)Reader)SharedCountWDS_SHUFFLE_SIZEi    WDS_INITIAL_SIZEi   c                    t        |t              r|f}g }d}|D ]  }t        j                  j	                  | |      }	 |j                  |       t        j                  |      5 }|j                  d      rt        j                  |      }nt        j                  |      }d d d        c S  t        j                  d| d| d       i S # 1 sw Y   ,xY w# t        $ r}t        |      }Y d }~d }~ww xY w)N z.jsonzDataset info file not found at z	. Error: z.. Falling back to provided split and size arg.)
isinstancestrospathjoinappendwdsgopenendswithjsonloadyaml	safe_load	Exception_loggerwarning)	rootnamestriederr_strn	full_pathf	info_dictes	            X/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/data/readers/reader_wds.py
_load_infor9   *   s    %EG GGLLq)			LL#9% 2::g& $		!I $q 1I	2
  OO
)%	' C7 	78 I2 2  	!fG	s/   &C$=C!	CC	C	C6!C11C6c                   X    e Zd ZU eed<   ee   ed<   dZee   ed<   dZeed<   dZ	eed<   y)		SplitInfonum_samples	filenames shard_lengthsr   	alt_labelnameN)
__name__
__module____qualname__int__annotations__r   r    r?   r@   rA   r>       r8   r;   r;   B   s3    Sz "M5:"IsD#NrG   r;   splitinfoc           	         d }d| v sd| v r| j                  d      } d}d}t        |       dkD  rt        | d         }| d   } d| vr5| j                  d	d
      }t        |      dz
  }|rd|v r||   |d   v r||   }t        |       }|r|d   |   }|swt	        |d   |d         D 	
ci c]  \  }	}
|	|

 c}
}	t        fd|D              }t        j                               |d<   t        j                               |d<   ||d<    ||      }|S t        |||      }|S d|vs| |d   vr0t        d|  d|j                  di       j                          d      | } |d   |    } ||      }|S c c}
}	w )Nc           	      ~    t        | d   t        | d         t        | d         | j                  dd      | d         S )Nr<   r=   r?   r@   r   rA   )r<   r=   r?   r@   rA   )r;   tupleget)	dict_infos    r8   _info_convertz(_parse_split_info.<locals>._info_convertL   sG    !-0Ik23	/ :;mmK46"
 	
rG   tarz..|r   r   r   z::-   splitsr=   r?   c              3   (   K   | ]	  }|     y wNr>   ).0r5   _fcs     r8   	<genexpr>z$_parse_split_info.<locals>.<genexpr>i   s     !BQ#a&!Bs   r<   )rA   r<   r=   zsplit z not found in info ())rH   lenrE   r   zipsumrL   keysvaluesr;   RuntimeErrorrM   )rH   rI   rO   r<   
split_namesplit_parts	split_idxsplit_filenames
split_infor5   crX   s              @r8   _parse_split_inforg   K   s   
 ~ C 
u:>eAh-Kau++c1-KK(1,IX-+i2HDQYN2Z(3
%e,h
3J(+J{,CZP_E`(ab1q!tb!!B/!BB*/
*;
;'.3CJJL.A
?+,7
=)&z2J  #')J  45X#>w.B488HVXCYC^C^C`Baabcdd(^E*
":.
' cs   )E4c                 l    t         j                  dt        |        d       t        | t              r| y)zQCall in an exception handler to ignore exceptions, issue a warning, and continue.zHandling webdataset error (z). Ignoring.T)r-   r.   reprr   	TypeError)exns    r8   log_and_continuerl   ~   s.    OO1$s)LIJ#y!	rG   c                    |r,t        j                  | d         }t        ||         }|dk  ryt        | |         }t        | |      }t	        j
                  |      5 }t        j                  |      }|j                          ddd       |r|j                  |      }t        ||| j                  dd            }	|	S # 1 sw Y   <xY w)z Custom sample decode
    * decode and convert PIL Image
    * cls byte string label to int
    * pass through JSON byte string (if it exists) without parse
    r(   r   N)jpgclsr(   )r(   loadsrE   r   ioBytesIOr   openr)   convertdictrM   )
sample	image_key
image_mode
target_keyr@   metaclass_labelimgbdecodeds
             r8   _decoder      s     zz&.)$y/*?&,- 69
%C	C Ajjm
 kk*% s&**VT2JKGN s   &B??Cc                  n    t               } | | j                  S t        j                  j	                         S )z'get dataloader worker seed from pytorch)r   seedr%   utilspytorch_worker_seed)worker_infos    r8   r   r      s/    !#K99((**rG   c                   "    e Zd Z	 	 	 	 ddZd Zy)detshuffle2c                 <    || _         || _        || _        || _        y rV   )bufsizeinitialr   epoch)selfr   r   r   r   s        r8   __init__zdetshuffle2.__init__   s      #DL"DLDIDJrG   c                 l   t        | j                  t              r| j                  j                  }n!| xj                  dz  c_        | j                  }| j                  dk  rt               |z   }n| j                  |z   }t        j                  |      }t        || j                  | j                  |      S )Nr   r   )r   r   r   valuer   r   randomRandomr   r   r   )r   srcr   r   rngs        r8   runzdetshuffle2.run   s    $**k2

(( 

a


yy1}*,u4yy5(--%CCt||SAArG   N)i  d   r   )rB   rC   rD   r   r   r>   rG   r8   r   r      s     
		BrG   r   c                   F     e Zd ZdZej
                  dddf fd	Zd Z xZS )ResampledShards2z,An iterable dataset yielding a list of urls.NTr   c                 &   t         |           t        j                  j	                  |      }|| _        t        | j
                  d   t              sJ || _        t        j                         | _        |t        n|| _        || _        || _        y)zSample shards from the shard list with replacement.

        :param urls: a list of URLs as a Python list or brace notation string
        r   N)superr   r%   
shardlistsr   urlsr   r    nshardsr   r   r   r   worker_seeddeterministicr   )r   r   r   r   r   r   	__class__s         r8   r   zResampledShards2.__init__   sv     	~~))$/	$))A,,,,==?2=2E.;*
rG   c              #     K   t        | j                  t              r| j                  j                  }n!| xj                  dz  c_        | j                  }| j                  r+t        j                  | j                         |z         | _        t        | j                        D ]O  }| j                  j                  dt        | j                        dz
        }t        | j                  |          Q yw)z#Return an iterator over the shards.r   r   )urlN)r   r   r   r   r   r   r   r   r   ranger   randintr[   r   ru   )r   r   _indexs       r8   __iter__zResampledShards2.__iter__   s     djj+.JJ$$E JJ!OJJJE}}T%5%5%7%%?@DHt||$ 	-AHH$$QDII(:;E499U+,,	-s   C2C4)	rB   rC   rD   __doc__sysmaxsizer   r   __classcell__r   s   @r8   r   r      s"    6
 ,-rG   r   c            "            e Zd Z	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddedee   dededee   dededed	ee   d
edededededee   dee   f  fdZd Z		 ddee   fdZ
d Zd Zd Zd Zd ZddZddZ xZS )	ReaderWdsr/   rA   rH   is_trainingr<   
batch_sizerepeatsr   	class_map	input_keyinput_img_modery   target_img_modefilename_keysample_shuffle_sizesample_initial_sizec                    t         |           t        t        d      || _        || _        || _        || _        || _        d| _	        |xs t        | _        |xs t        | _        |
| _        || _        || _        || _        d| _        t'        | j                        | _        t+        || j(                        | _        ||| _        n| j,                  j.                  | _        |r| j.                  st        d      d| _        |	rt3        |	      | _        d| _        ni | _        d| _        d| _        t;        j<                         r]t;        j>                         rIt;        j@                         dkD  r2t;        jB                         | _        t;        j@                         | _        d | _"        d| _#        || _$        d| _%        d| _&        d| _'        d| _(        tS               | _*        d | _+        y )	NzcPlease install webdataset 0.2.x package `pip install git+https://github.com/webdataset/webdataset`.i  z.JPEGzBInvalid split definition, num_samples not specified in train mode.FTr   r   ),r   r   r%   r`   r/   r   r   r   common_seedshard_shuffle_sizeSAMPLE_SHUFFLE_SIZEr   SAMPLE_INITIAL_SIZEr   r   r   ry   r   key_extr9   rI   rg   re   r<   remap_classr   class_to_idx	dist_rankdist_num_replicasdistis_availableis_initializedget_world_sizeget_rankr   	worker_idr   num_workersglobal_worker_idglobal_num_workers
init_countr   epoch_countds)r   r/   rA   rH   r   r<   r   r   r   r   r   r   ry   r   r   r   r   r   s                    r8   r   zReaderWds.__init__  s   & 	;uw w	&$"%#6#M:M #6#M:M ",$(tyy)	+E499="*D#::Dt//!cee  .y 9D#D "D !"4#6#6#8T=P=P=RUV=V!]]_DN%)%8%8%:D"   !"#&= rG   c                 &    || j                   _        y rV   )r   r   )r   counts     r8   	set_epochzReaderWds.set_epochQ  s    !&rG   r   c                 n    | j                   y |&|| _        | j                  | j                  z  | _        y y rV   )r   r   r   r   )r   r   s     r8   set_loader_cfgzReaderWds.set_loader_cfgT  s=     77"*D&*&<&<t?O?O&OD# #rG   c                 `   | j                   t        j                  j                  j	                         }|:|| _         |j
                  | _        |j                  | _        |j                  | _	        | j                  | j                  z  | _        | j                  | j                  z  | j                  z   | _        | j                  j                  D cg c],  }t         j"                  j%                  | j&                  |      . }}t)        j*                  |      g}| j,                  r|j/                  t1        | j2                  | j4                  | j6                        | j8                  t)        j:                  t<              t)        j>                  | j@                  | jB                  tE        jF                  | j                              g       n5|j/                  | j8                  t)        j:                  t<              g       |j/                  t)        jH                  tK        tL        | jN                  | jP                  | j                  jR                        t<              t)        jT                  | jN                  | jV                        g       t)        jX                  | | _-        yc c}w )z8 Lazily initialize worker (in worker processes)
        N)r   r   )handler)r   r   r   )rw   rx   r@   )imagetarget).r   torchr   datar   idr   r   r   r   r   r   r   r   re   r=   r!   r"   r#   r/   r%   SimpleShardListr   extendr   r   r   r   _split_by_node_and_workertarfile_to_samplesrl   shuffler   r   r   r   mapr   r   r   r   r@   renamery   DataPipeliner   )r   r   r5   abs_shard_filenamespipelines        r8   
_lazy_initzReaderWds._lazy_init^  s    #++**::<K&#. !,#.#3#3 #.#:#: &*&<&<t?O?O&OD#$(NNT5E5E$E$VD! DH??C\C\]arww||DIIq9]]''(;<=OO++))**
 ..&&/?@ 44 44d&6&67   OO..&&/?@ 
 	GG"nn#22"oo77	 ) JJT^^DOOD
 	 ""H-K ^s   1J+c              #      K   | j                   dkD  r,t        || j                  d | j                         D ]  }|  y |D ]  }|  y wNr   )r   r   r   )r   r   ss      r8   r   z#ReaderWds._split_by_node_and_worker  sU     ""Q&C!6!6d>U>UV   s   AA
c                 H   | j                   t        | j                  | j                        z  }| j                  s| j                  dkD  rt        j                  |      }| j                  r/t        j                  || j                  z        | j                  z  }t        |      S r   )	r<   maxr   r   r   mathceilr   rE   )r   num_worker_sampless     r8   _num_samples_per_workerz!ReaderWds._num_samples_per_worker  s    !--D4K4KTMcMc0ddt559!%+=!>!%+=+O!PSWSbSb!b%&&rG   c              #   X  K   | j                   | j                          | j                         }| j                  s| j                  dkD  r| j                   j                  |      }n| j                   }d}|D ]0  }|d   }| j                  r| j                  |   }|d   |f |dz  }2 y w)Nr   r   r   r   )r   r   r   r   r   
with_epochr   r   )r   r   r   irv   r   s         r8   r   zReaderWds.__iter__  s     77?OO!99;t559
 ##$67BB 	FH%F**62/6))FA	s   B(B*c                 @    | j                         | j                  z  }|S rV   )r   r   )r   r<   s     r8   __len__zReaderWds.__len__  s!    224t7G7GGrG   c                     J d       )NzNot supportedr>   )r   r   basenameabsolutes       r8   	_filenamezReaderWds._filename  s    %o%urG   c                 4   | j                   | j                          g }| j                   D ]j  }| j                  |v r|| j                     }nd|v r|d   | j                  z   }nJ d       |j	                  |       t        |      | j                  k\  si |S  |S )z0 Return all filenames in dataset, overrides base__key__zNo supported name field present)r   r   r   r   r$   r[   r<   )r   r   r   r0   rv   rA   s         r8   r=   zReaderWds.filenames  s    77?OOgg 		F  F*d//0f$i(4<<7???uLL5zT---		 rG   )NtrainFNr   r   *   Nzjpg;png;webpRGBro   r   filenameNNrV   )FF)rB   rC   rD   r    r
   boolrE   ru   r   r   r   r   r   r   r   r   r   r=   r   r   s   @r8   r   r     s<    #'  %)-(,+"'##% *1515#HH 3-H 	H
 H "#H H H H  ~H H  H H !H H  "*#!H" "*##HT'
 *.P!#P3.j'0&rG   r   ))z
_info.jsonz	info.json)rn   r   ro   r   )Fr   rq   r(   loggingr   r!   r   r   dataclassesr   	functoolsr   	itertoolsr   typingr   r   r   r	   r
   r   r   torch.distributeddistributedr   r*   PILr   torch.utils.datar   r   r   
webdatasetr%   webdataset.filtersr   r   webdataset.shardlistsr   webdataset.tariteratorsr   r   r   r   ImportErrorr   r   readerr   shared_countr   	getLoggerrB   r-   rE   environrM   r   r   r9   r;   r    rg   rl   r   r   PipelineStager   r   r   r>   rG   r8   <module>r     sR   
    	  
 !   = =      F F51bb
 &  %
'

H
%"**..);TBC "**..);TBC 0   0S 0 0f !H+ ?Bc'' B> K)- )-XL LU  
CKs    E   	EE