
    kh#                     L   d Z ddlZddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlmZ ddlmZ ddlmZ dd	lmZ  ej.                  e      Zd
Z G d d      Zdej8                  de	de
e   fdZ	 	 	 	 ddee	   dee   deeeee
f      defdZ  G d de      Z!y)aS   A dataset reader that reads tarfile based datasets

This reader can extract image samples from:
* a single tar of image files
* a folder of multiple tarfiles containing imagefiles
* a tar of tars containing image files

Labels are based on the combined folder and/or tar name structure.

Hacked together by / Copyright 2020 Ross Wightman
    N)glob)ListTupleDictSetOptionalUnion)natural_key   )load_class_map)get_img_extensions)Readerz_tarinfos.picklec                   L    e Zd Zddej                  dej
                  fdZd Zy)TarStateNtftic                 .    || _         || _        i | _        y N)r   r   children)selfr   r   s      a/var/www/teggl/fontify/venv/lib/python3.12/site-packages/timm/data/readers/reader_image_in_tar.py__init__zTarState.__init__!   s    #%#%-/    c                     d | _         y r   )r   r   s    r   resetzTarState.reset&   s	    r   )NN)__name__
__module____qualname__tarfileTarFileTarInfor   r    r   r   r   r      s"    07?? 0w 0
r   r   r   parent_info
extensionsc                    d}t        |       D ]^  \  }}|j                         st        j                  j	                  |j                        \  }}t        j                  j                  |      \  }}	|	j                         }	|	dk(  rt        j                  | j                  |      d      5 }
t        |j                  t        j                  j                  |d   |      |g g       }|t        |
||      z  }t        j                  | d|j                   d	t!        |d
          d       |d   j#                  |       d d d        @|	|v sF|d
   j#                  |       |dz  }a |S # 1 sw Y   nxY w)Nr   .tarr|)fileobjmodepathnamer+   r   r   samplesr%   z"/?. Extracted child tarinfos from . r.   z images.r   r   )	enumerateisfileosr+   splitsplitextlowerr    openextractfiledictr-   join_extract_tarinfo_loggerdebuglenappend)r   r$   r%   sample_countir   dirnamebasenamer-   extctf
child_infos               r   r;   r;   *   s\   L2 2yy{GGMM"''2GG$$X.	ciik&=bnnR&8tD ;!rww||K4G'NSU`blnp
 0jZ XX#EbggYbQTU_`iUjQkPlltuvJ'..z:; ; J	"))"-AL  ; ;s   /BE22E<	class_name_to_idxcache_tarinfosortc                   $%&' |st        d      n
t        |      }d}t        j                  j	                  |       rt        j                  j                  |       d   j                         dk(  sJ | g}t        j                  j                  |       \  } }t        j                  j                  |      d   }d}n~| j                  t        j                  j                        j                  t        j                  j                        d   }t        t        j                  j                  | d      d      }t        |      }t        |D 	cg c]!  }	t        j                  j                  |	      # c}	      }
|sJ d	|  d
       t        j!                  d|
dz  dd       t#        g       }d}|	|
dkD  rdnd}|r,d|z   t$        z   }t        j                  j                  | |      }t        j                  j'                  |      r\t        j!                  d| d       t)        |d      5 }t+        j,                  |      }d d d        t        |d         |k(  sEJ d       t/        |      D ]  \  }}|rdn>t        j                  j                  t        j                  j1                  |            d   }t3        j(                  |d      5 }t#        t        j                  j5                  ||       |d g g       }t7        |||      }t        |d         }t        j9                  | d| d| d| d | d!
       d d d        |d   j;                          |rDt        j!                  d"| d       t)        |d#      5 }t+        j<                  ||       d d d        g 'g &d%d%g }dd$d%$$%&'fd&}t        j!                  d'       |d   D ]t  }|rd n|d(   }t?               }d}|d   D ]2  } |||)      }|rt?        |d*   +      |j@                  |d(   <   ||z  }4 | |||)      z  }|sb|j;                  ||f       v ~%rBtC        tE        t        &      tF        ,            }t/        |      D ci c]  \  }}||
 c}}t        j!                  d-       tI        '&      D  !cg c]  \  } }!|!v s| |!   f }"} }!|rtE        |"d. ,      }"tI        |" \  '}#tK        jL                  '      'tK        jL                  |#      }#t        j!                  d/t        '       d0t        |       d1       '|#|fS c c}	w # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY wc c}}w c c}!} w )2NT)as_setFr'   r   z*.tar)	recursivez'No .tar files found at specified path (z).z	Scanning i   z.2fzMB of tar files...)tartrees l       
 _z!Reading tar info from cache file .rbrN   z3Cached tartree len doesn't match number of tarfilesr(   )r*   r,   r/   r   /z. Extracted tarinfos from r0   z children, z	 samples.zWriting tar info to cache file wb)	leaf_onlyc                 8   t        j                  j                  | j                  t         j                  j                        }| r,|j                  t         j                  j                        d   S |j                  t         j                  j                  d      S )NrL   rP   )r3   r+   r:   stripsepr4   replace)rU   r+   s     r   _label_from_pathsz+extract_tarinfos.<locals>._label_from_paths}   s^    ww||T"((5.7tzz"''++&r*[T\\"''++WZ=[[r   c                     d}| d   D ]i  } | d   t         j                  j                  |j                              }s|vr=	j                  ||| d   f       j                  |       |dz  }k |S )Nr   r.   r+   r   r   )r3   r+   rB   r?   )
infofnaddedslabelrZ   build_class_maprG   labelsr.   s
        r   _add_samplesz&extract_tarinfos.<locals>._add_samples   s}    i 	A%d6lBGGOOAFF4KLE"u4E'ENNAr4:./MM% QJE	 r   z+Collecting samples and building tar states.r-   )r]   r   )r   )keyz$Mapping targets and sorting samples.c                 8    t        | d   d   j                        S Nr   )r
   r+   )ks    r   <lambda>z"extract_tarinfos.<locals>.<lambda>   s    TUVWTXYZT[T`T`Ha r   zFinished processing z samples across z tar files.)'r   setr3   r+   r2   r5   r6   r4   rW   rX   r   r:   r>   sumgetsizer<   r\   r9   CACHE_FILENAME_SUFFIXexistsr7   pickleloadr1   rC   r    relpathr;   r=   r?   dumpr   r   listsortedr
   zipnparray)(rootrG   rH   r%   rI   root_is_tartar_filenames	root_namenum_tarsf	tar_bytesr\   
cache_pathcache_filenamepfrA   r]   r+   r   r$   num_samplesnum_childrentarfilesrc   tar_name	tar_stateparent_addedrF   child_addedsorted_labelsidxcr_   lsamples_and_targetstargetsrZ   ra   rb   r.   s(    `                                  @@@@r   extract_tarinfosr   ?   s$    9C#40JJK	ww~~dww%b)//1V;;;''---iGG$$Y/2	JJrww{{+11"''++>rB	RWW\\$8DI=!H?ARWW__Q'?@IG>tfBGG8LL9Yw.s33EFGDJ )J 6Ey+@@WW\\$7
	ww~~j!8AFG*d# 	#r;;r?D	#4
#$0g2gg0}- 	1EAr$2"''*:*:277;K;KB;O*PQR*SDbt, v"D(AQU`blno.r;:V";z#:;c8*$>rd"\NR]^i]jjstv	v ##K0	1 LL::,aHIj$' &2D"%& GFO  H+/ \	 	 LL>@J' 3&4K,?J	%j1 	(J&zh?K9AZPTEU9V	"":f#56K'L		(
 	[X>>OOXy123 	VCK[AB2;M2JKQQVKLL79ADWfAUpAYZ^oYoA034pp$%8>ab/0GWhhwGhhwGLL'G~5Ec(m_T_`aG.88o @	# 	#v v& &b L qs=   &U3UA/U!	U.U;=V
	VU!U+	.U8c                   8     e Zd ZdZd fd	Zd Zd ZddZ xZS )ReaderImageInTarzI Multi-tarfile dataset reader where there is one .tar file per class
    c                    t         	|           d }|rt        ||      }|| _        t	        | j                  ||      \  | _        | _        | _        }| j                  j                         D ci c]  \  }}||
 c}}| _	        t        |      dk(  r$|d   d   d| _        |d   d   | _        || _        y d| _        t        |      | _        || _        y c c}}w )N)rG   rH   r   r   TF)superr   r   rw   r   r.   r   rG   itemsclass_idx_to_namer>   rx   r   r9   cache_tarfiles)
r   rw   	class_mapr   rH   rG   r   rg   v	__class__s
            r   r   zReaderImageInTar.__init__   s      .y$ ?	GWII/'H
DdlD$:H
 483I3I3O3O3Q!R41a!Q$!Rx=A(1+a."8#D%a[^DN -  %D!(^DN, "Ss   0Cc                 ,    t        | j                        S r   )r>   r.   r   s    r   __len__zReaderImageInTar.__len__   s    4<<  r   c                    | j                   |   }| j                  |   }|\  }}}|r*t        j                  j	                  | j
                  |      n| j
                  }d }d }	| j                  r3| j                  r| j                  n| j                  |   }	|	j                  }|(t        j                  |      }| j                  r||	_	        || j                  r#|	j                  |j                     j                  nd }
|
Ot        j                  |j                  |            }
| j                  r|
|	j                  |j                     _	        |
}|j                  |      |fS )N)r)   )r.   r   r3   r+   r:   rw   r   rx   r   r   r    r7   r   r-   r8   )r   indexsampletarget	sample_ti	parent_fnchild_ti
parent_absr   cache_staterE   s              r   __getitem__zReaderImageInTar.__getitem__   s&   e$e$)/&	9h;DRWW\\$))Y7$))
,0,<,<$..$..QZB[KB:j)B""!#<@<O<O+&&x}}588UYC{ll2>>(+CD&&=@K((7:B~~i(&00r   c                     | j                   |   d   j                  }|rt        j                  j	                  |      }|S rf   )r.   r-   r3   r+   rC   )r   r   rC   absolutefilenames        r   	_filenamezReaderImageInTar._filename   s7    <<&q)..ww''1Hr   )rO   TN)FF)	r   r   r   __doc__r   r   r   r   __classcell__)r   s   @r   r   r      s    -*!12r   r   )NNNT)"r   loggingr3   rn   r    r   typingr   r   r   r   r   r	   numpyru   timm.utils.miscr
   r   r   img_extensionsr   readerr   	getLoggerr   r<   rl   r   r!   strr;   boolr   r   r#   r   r   <module>r      s   
  	    : :  ' % . 
'

H
%*   t S . -1(,8<j9#D>j9  ~j9 U4#345	j9
 j9Z9v 9r   