
    kh                         d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
 d dlmZ ddlmZmZ ddlmZ ddlmZ  G d	 d
e      Zy)    N)Path)AnyCallableDictListOptionalTupleUnion)Tensor   )find_classesmake_dataset)
VideoClips)VisionDatasetc            !           e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 ddeeef   dedededee   ded	e	d
ee
   deeeef      dededededededdf  fdZedeeef   fd       Zdee   deded	e	dee   f
dZdefdZdedeeeef   fdZ xZS )UCF101a  
    `UCF101 <https://www.crcv.ucf.edu/data/UCF101.php>`_ dataset.

    UCF101 is an action recognition video dataset.
    This dataset consider every video as a collection of video clips of fixed size, specified
    by ``frames_per_clip``, where the step in frames between each clip is given by
    ``step_between_clips``. The dataset itself can be downloaded from the dataset website;
    annotations that ``annotation_path`` should be pointing to can be downloaded from `here
    <https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip>`_.

    To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
    and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
    elements will come from video 1, and the next three elements from video 2.
    Note that we drop clips which do not have exactly ``frames_per_clip`` elements, so not all
    frames in a video might be present.

    Internally, it uses a VideoClips object to handle clip creation.

    Args:
        root (str or ``pathlib.Path``): Root directory of the UCF101 Dataset.
        annotation_path (str): path to the folder containing the split files;
            see docstring above for download instructions of these files
        frames_per_clip (int): number of frames in a clip.
        step_between_clips (int, optional): number of frames between each clip.
        fold (int, optional): which fold to use. Should be between 1 and 3.
        train (bool, optional): if ``True``, creates a dataset from the train split,
            otherwise from the ``test`` split.
        transform (callable, optional): A function/transform that takes in a TxHxWxC video
            and returns a transformed version.
        output_format (str, optional): The format of the output video tensors (before transforms).
            Can be either "THWC" (default) or "TCHW".

    Returns:
        tuple: A 3-tuple with the following entries:

            - video (Tensor[T, H, W, C] or Tensor[T, C, H, W]): The `T` video frames
            -  audio(Tensor[K, L]): the audio frames, where `K` is the number of channels
               and `L` is the number of points
            - label (int): class of the video clip
    Nrootannotation_pathframes_per_clipstep_between_clips
frame_ratefoldtrain	transform_precomputed_metadatanum_workers_video_width_video_height_video_min_dimension_audio_samplesoutput_formatreturnc                    t         |   |       d|cxk  rdk  sn t        d|       d}|| _        || _        t        | j                        \  | _        }t        | j                  ||d       | _	        | j                  D cg c]  }|d   	 }}t        |||||	|
|||||      }|| _        | j                  ||||      | _        |j                  | j                        | _        || _        y c c}w )Nr      z$fold should be between 1 and 3, got )avi)is_valid_filer   )r   r   r   r   r    r!   )super__init__
ValueErrorr   r   r   r   classesr   samplesr   full_video_clips_select_foldindicessubsetvideo_clipsr   )selfr   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   
extensionsclass_to_idxx
video_listr0   	__class__s                        W/var/www/teggl/fontify/venv/lib/python3.12/site-packages/torchvision/datasets/ucf101.pyr(   zUCF101.__init__6   s    $ 	D~A~CD6JKK
	
%1$))%<"l#DII|ZW[\$(LL1qad1
1 !#%'!5)'
  !,((_dER&--dll;") 2s   C1c                 .    | j                   j                  S N)r,   metadatar1   s    r7   r:   zUCF101.metadatah   s    $$---    r5   c           
      T   |rdnd}| d|dd}t         j                  j                  ||      }t               }t	        |      5 }|j                         }	|	D 
cg c]$  }
|
j                         j                  d      d   & }	}
|	D 
cg c];  }
t        j                  j                  | j                  g|
j                  d       = }	}
|j                  |	       d d d        t        t        |            D cg c]  }||   |v s| }}|S c c}
w c c}
w # 1 sw Y   >xY wc c}w )	Nr   testlist02dz.txt r   /)ospathjoinsetopen	readlinesstripsplitr   updaterangelen)r1   r5   r   r   r   namefselected_filesfiddatar4   ir.   s                r7   r-   zUCF101._select_foldl   s   wVtD:T*GGLL$/!W 	(==?D59:AGGIOOC(+:D:DHIqBGGLL:QWWS\:IDI!!$'		(
 $C
O4X
18W1XX	 ;I	( 	(
 Ys7   D)DD	A D	D:D%D%
DD"c                 6    | j                   j                         S r9   )r0   	num_clipsr;   s    r7   __len__zUCF101.__len__y   s    ))++r<   idxc                     | j                   j                  |      \  }}}}| j                  | j                  |      d   }| j                  | j	                  |      }|||fS )Nr   )r0   get_clipr+   r.   r   )r1   rW   videoaudioinfo	video_idxlabels          r7   __getitem__zUCF101.__getitem__|   sb    (,(8(8(A(A#(F%udIT\\)45a8>>%NN5)EeU""r<   )r   Nr   TNNr   r   r   r   r   THWC)__name__
__module____qualname____doc__r
   strr   intr   boolr   r   r   r(   propertyr:   r   r-   rV   r	   r   r_   __classcell__)r6   s   @r7   r   r      st   '\ #$$((,:>$%#!0#CI0# 0# 	0#
  0# SM0# 0# 0# H%0#  (S#X70# 0# 0# 0# "0# 0#  !0#" 
#0#d .$sCx. . .tCy 3 c Z^ cghkcl , ,#s #uVVS-@'A #r<   r   )rC   pathlibr   typingr   r   r   r   r   r	   r
   torchr   folderr   r   video_utilsr   visionr   r    r<   r7   <module>rq      s.    	  D D D  . # !w#] w#r<   