
    khw                     v    d dl Z d dlmZ d dlmZmZmZmZmZ ddl	m
Z
 ddlmZmZmZ ddlmZ  G d d	e      Zy)
    N)Path)AnyCallableOptionalTupleUnion   )default_loader)check_integritydownload_and_extract_archivedownload_url)VisionDatasetc                        e Zd ZdZdZdZdZdddefdee	e
f   dee   d	ee   d
edee	gef   ddf fdZdedeeef   fdZdefdZdefdZddZ xZS )SBUa  `SBU Captioned Photo <http://www.cs.virginia.edu/~vicente/sbucaptions/>`_ Dataset.

    Args:
        root (str or ``pathlib.Path``): Root directory of dataset where tarball
            ``SBUCaptionedPhotoDataset.tar.gz`` exists.
        transform (callable, optional): A function/transform that takes in a PIL image or torch.Tensor, depends on the given loader,
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If True, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
        loader (callable, optional): A function to load an image given its path.
            By default, it uses PIL as its image loader, but users could also pass in
            ``torchvision.io.decode_image`` for decoding image data into tensors directly.
    zHhttps://www.cs.rice.edu/~vo9/sbucaptions/SBUCaptionedPhotoDataset.tar.gzzSBUCaptionedPhotoDataset.tar.gz 9aec147b3488753cf758b4d493422285NTroot	transformtarget_transformdownloadloaderreturnc                 &   t         |   |||       || _        |r| j                          | j	                         st        d      g | _        g | _        t        j                  j                  | j                  dd      }t        j                  j                  | j                  dd      }t        t        |      t        |            D ]  \  }}	|j                         }
t        j                  j                  |
      }t        j                  j                  | j                  d|      }t        j                  j!                  |      s|	j                         }| j                  j#                  |       | j                  j#                  |        y )N)r   r   zHDataset not found or corrupted. You can use download=True to download itdataset$SBU_captioned_photo_dataset_urls.txtz(SBU_captioned_photo_dataset_captions.txt)super__init__r   r   _check_integrityRuntimeErrorphotoscaptionsospathjoinr   zipopenrstripbasenameexistsappend)selfr   r   r   r   r   file1file2line1line2urlphotofilenamecaption	__class__s                 T/var/www/teggl/fontify/venv/lib/python3.12/site-packages/torchvision/datasets/sbu.pyr   zSBU.__init__!   s%    	EUVMMO$$&ijj TYY	3YZTYY	3]^UT%[9 	.LE5,,.CGG$$S)Eww||DIIy%@Hww~~h',,.""5)$$W-	.    indexc                 .   t         j                  j                  | j                  d| j                  |         }| j                  |      }| j                  | j                  |      }| j                  |   }| j                  | j                  |      }||fS )z
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is a caption for the photo.
        r   )	r!   r"   r#   r   r   r   r   r    r   )r*   r6   r1   imgtargets        r4   __getitem__zSBU.__getitem__B   s     77<<		9dkk%6HIkk(#>>%..%Cu%  ,**62FF{r5   c                 ,    t        | j                        S )z$The number of photos in the dataset.)lenr   )r*   s    r4   __len__zSBU.__len__U   s    4;;r5   c                     | j                   }t        j                  j                  || j                        }t        || j                        syy)z1Check the md5 checksum of the downloaded tarball.FT)r   r!   r"   r#   r1   r   md5_checksum)r*   r   fpaths      r4   r   zSBU._check_integrityY   s9    yyT4==1ud&7&78r5   c           	         | j                         ryt        | j                  | j                  | j                  | j                  | j
                         t        t        j                  j                  | j                  dd            5 }|D ]G  }|j                         }	 t        |t        j                  j                  | j                  d             I 	 ddd       y# t        $ r Y _w xY w# 1 sw Y   yxY w)zEDownload and extract the tarball, and download each individual photo.Nr   r   )r   r   r/   r   r1   r?   r%   r!   r"   r#   r&   r   OSError)r*   fhliner/   s       r4   r   zSBU.downloada   s       "$TXXtyy$))T]]TXTeTef "'',,tyy)5[\] 	ac kkm bggll499i&HI	 	
   	 	s0   C-4CC-	C*'C-)C**C--C6)r   N)__name__
__module____qualname____doc__r/   r1   r?   r
   r   strr   r   r   boolr   r   intr   r:   r=   r   r   __classcell__)r3   s   @r4   r   r      s    " UC0H5L
 )-/3'5.CI. H%. #8,	.
 . #$. 
.B sCx &   $ r5   r   )r!   pathlibr   typingr   r   r   r   r   folderr
   utilsr   r   r   visionr   r    r5   r4   <module>rS      s+    	  8 8 " N N !g- gr5   