
    kh&                         d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZmZmZmZ d dlmZ dd	lmZmZ dd
lmZmZmZmZ ddlmZ ddlmZ deee	f   deee	f   deddfdZ  G d de      Z!y)    N)partial)Pool)path)Path)AnyCallableDictOptionalTupleUnion)Tensor   )find_classesmake_dataset)check_integritydownload_and_extract_archivedownload_urlverify_str_arg)
VideoClips)VisionDatasettarpath	videopathlinereturnc                     t        || |       y N)r   )r   r   r   s      Y/var/www/teggl/fontify/venv/lib/python3.12/site-packages/torchvision/datasets/kinetics.py_dl_wrapr      s     w	:    c            )       @    e Zd ZdZddddZddddZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d'd
eeef   de	dedede
e	   de	de
e   deedf   dede	de	de
eeef      de	de	de	de	de	dededd	f( fdZd(d Zd(d!Zd(d"Zedeeef   fd#       Zde	fd$Zd%e	deeee	f   fd&Z xZS ))Kineticsu  `Generic Kinetics <https://www.deepmind.com/open-source/kinetics>`_
    dataset.

    Kinetics-400/600/700 are action recognition video datasets.
    This dataset consider every video as a collection of video clips of fixed size, specified
    by ``frames_per_clip``, where the step in frames between each clip is given by
    ``step_between_clips``.

    To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
    and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
    elements will come from video 1, and the next three elements from video 2.
    Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
    frames in a video might be present.

    Args:
        root (str or ``pathlib.Path``): Root directory of the Kinetics Dataset.
            Directory should be structured as follows:
            .. code::

                root/
                ├── split
                │   ├──  class1
                │   │   ├──  vid1.mp4
                │   │   ├──  vid2.mp4
                │   │   ├──  vid3.mp4
                │   │   ├──  ...
                │   ├──  class2
                │   │   ├──   vidx.mp4
                │   │    └── ...

            Note: split is appended automatically using the split argument.
        frames_per_clip (int): number of frames in a clip
        num_classes (int): select between Kinetics-400 (default), Kinetics-600, and Kinetics-700
        split (str): split of the dataset to consider; supports ``"train"`` (default) ``"val"`` ``"test"``
        frame_rate (float): If omitted, interpolate different frame rate for each clip.
        step_between_clips (int): number of frames between each clip
        transform (callable, optional): A function/transform that takes in a TxHxWxC video
            and returns a transformed version.
        download (bool): Download the official version of the dataset to root folder.
        num_workers (int): Use multiple workers for VideoClips creation
        num_download_workers (int): Use multiprocessing in order to speed up download.
        output_format (str, optional): The format of the output video tensors (before transforms).
            Can be either "THWC" or "TCHW" (default).
            Note that in most other utils and datasets, the default is actually "THWC".

    Returns:
        tuple: A 3-tuple with the following entries:

            - video (Tensor[T, C, H, W] or Tensor[T, H, W, C]): the `T` video frames in torch.uint8 tensor
            - audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
              and `L` is the number of points in torch.float tensor
            - label (int): class of the video clip

    Raises:
        RuntimeError: If ``download is True`` and the video archives are already extracted.
    zChttps://s3.amazonaws.com/kinetics/400/{split}/k400_{split}_path.txtzChttps://s3.amazonaws.com/kinetics/600/{split}/k600_{split}_path.txtzMhttps://s3.amazonaws.com/kinetics/700_2020/{split}/k700_2020_{split}_path.txt400600700z=https://s3.amazonaws.com/kinetics/400/annotations/{split}.csvz=https://s3.amazonaws.com/kinetics/600/annotations/{split}.csvzBhttps://s3.amazonaws.com/kinetics/700_2020/annotations/{split}.csvNrootframes_per_clipnum_classessplit
frame_ratestep_between_clips	transform
extensions.downloadnum_download_workersnum_workers_precomputed_metadata_video_width_video_height_video_min_dimension_audio_samples_audio_channels_legacyoutput_formatr   c                 Z   t        |dg d      | _        || _        |
| _        || _        || _        |r|| _        d| _        d}|	r;t        d      t        j                  ||      | _        t        |dg d      | _        |	r| j                          t        | 5  | j                         t        | j                        \  | _        }t!        | j                  ||d 	      | _        | j"                  D cg c]  }|d
   	 }}t%        ||||||||||||      | _        || _        y c c}w )Nr(   r"   )argvalid_valuesunknownTHWCz2Cannot download the videos using legacy_structure.r)   )trainvaltest)is_valid_filer   )r0   r2   r3   r4   r5   r6   r8   )r   r(   r-   r/   r&   r7   split_folderr)   
ValueErrorr   joindownload_and_process_videossuper__init__r   classesr   samplesr   video_clipsr,   )selfr&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   class_to_idxx
video_list	__class__s                          r   rG   zKinetics.__init__[   s(   0 *+=Wlm$$8!	 $D"DJ"M !UVV $		$ 6D'7IabDJ,,.#%1$2C2C%D"l#D$5$5|Z_cd$(LL1qad1
1%!#%'!5)+'
 # 2s   6D(c                 D    | j                          | j                          y)zEDownloads all the videos to the _root_ folder in the expected format.N)_download_videos_make_ds_structurerK   s    r   rE   z$Kinetics.download_and_process_videos   s    !r   c                 h   t        j                  | j                        ryt        j                  | j                  d      }t        j                  | j                  d      }| j
                  | j                     j                  | j                        }t        j                  |t        j                  |            }t        |      st        ||       t        |      5 }|j                         j                         D cg c]#  }t        j                   j#                  |d      % }}ddd       | j$                  dk(  rD ]  }t'        ||| j                          yt)        t*        || j                        }t-        | j$                        }	|	j/                  |       yc c}w # 1 sw Y   xY w)a  download tarballs containing the video to "tars" folder and extract them into the _split_ folder where
        split is one of the official dataset splits.

        Raises:
            RuntimeError: if download folder exists, break to prevent downloading entire dataset again.
        Ntarsfilesr)   z/,:)safer   )r   existsrB   rD   r&   	_TAR_URLSr(   formatr)   basenamer   r   openread
splitlinesurllibparsequoter/   r   r   r   r   map)
rK   tar_pathfile_list_path	split_urlsplit_url_filepathfiler   list_video_urlspartpoolprocs
             r   rQ   zKinetics._download_videos   s^    ;;t(()99TYY/499g6NN4#3#34;;$**;M	!YY~t}}Y7OP12N3$% 	jPTPYPYP[PfPfPhiv||11$U1CiOi	j $$)' P,T8T=N=NOP 8Xt/@/@ADD556HLL/ j	j 	js   !F( (F#(F(#F((F1c           
         t        j                  | j                  d      }t        t        j                  || j                   d            s=t        | j                  | j                     j                  | j                        |       t        j                  || j                   d      }d}t        |      5 }t        j                  |      }|D ]  }|j                  |d   t        |d         t        |d               }|d	   j                  d
d      j                  dd      j                  dd      j                  dd      }t        j                  t        j                  | j                   |      d       t        j                  | j                   |      }	t        j"                  |	      st        j                  |	t        j                  | j                   ||              	 ddd       y# 1 sw Y   yxY w)u   move videos from
        split_folder/
            ├── clip1.avi
            ├── clip2.avi

        to the correct format as described below:
        split_folder/
            ├── class1
            │   ├── clip1.avi

        annotationsz.csvrW   z{ytid}_{start:06}_{end:06}.mp4
youtube_id
time_starttime_end)ytidstartendlabel _' ()T)exist_okN)r   rD   r&   r   r)   r   _ANNOTATION_URLSr(   r[   r]   csv
DictReaderintreplaceosmakedirsrB   isfile)
rK   annotation_pathrm   file_fmtstrcsvfilereaderrowfrt   downloaded_files
             r   rR   zKinetics._make_ds_structure   s    ))DII}=tyyTZZL:MNO..t/?/?@GGdjjGY[jkiiDJJ<t1DE6+ 	'^^G,F &&\*c,/0C
O, ' 
 G,,S#6>>sBGOOPSUWX``adfhiDIId&7&7?$O"&))D,=,=q"A;;/JJ'		$"3"3UA>	 	 	s   :C:G758G77H c                 .    | j                   j                  S r   )rJ   metadatarS   s    r   r   zKinetics.metadata   s    (((r   c                 6    | j                   j                         S r   )rJ   	num_clipsrS   s    r   __len__zKinetics.__len__   s    ))++r   idxc                     | j                   j                  |      \  }}}}| j                  |   d   }| j                  | j                  |      }|||fS )Nr   )rJ   get_cliprI   r,   )rK   r   videoaudioinfo	video_idxrt   s          r   __getitem__zKinetics.__getitem__   sY    (,(8(8(A(A#(F%udIY'*>>%NN5)EeU""r   )r#   r>   Nr   N)avimp4Fr   r   Nr   r   r   r   r   FTCHW)r   N)__name__
__module____qualname____doc__rZ   r|   r   strr   r   r
   r   r   boolr	   r   rG   rE   rQ   rR   propertyr   r   r   r   __classcell__)rO   s   @r   r!   r!      s   7t UT^I ONS !$("#(,&4$%:>$% #)?#CI?# ?# 	?#
 ?# SM?#  ?# H%?# #s(O?# ?# "?# ?#  (S#X7?# ?# ?#  "!?#" #?#$ %?#& '?#( )?#* 
+?#B"
06!F )$sCx. ) ), ,#s #uVVS-@'A #r   r!   )"r}   r   r`   	functoolsr   multiprocessingr   r   pathlibr   typingr   r   r	   r
   r   r   torchr   folderr   r   utilsr   r   r   r   video_utilsr   visionr   r   r   r!    r   r   <module>r      st    
 	       > >  . ^ ^ # !;eCI& ;5d3C ;3 ;SW ;W#} W#r   