pytorch · bjuncek · Oct 21, 2021 · Oct 21, 2021 · Oct 31, 2021 · Nov 2, 2021
diff --git a/main.py b/main.py
@@ -0,0 +1,32 @@
+from torchvision.prototype import datasets
+from torchvision.prototype.datasets.video_utils import AVKeyframeReader, AVRandomFrameReader, AVClipReader
+
+
+
+print("\n \n KEYFRAMES \n \n")
+ct = 0
+dataset = AVKeyframeReader(datasets.load("ucf101"))
+for i in dataset:
+    print(i)
+    ct += 1
+    if ct > 5:
+        break 
+
+
+print("\n \n RANDOM FRAMES")
+ct = 0
+dataset = AVRandomFrameReader(datasets.load("ucf101"), num_samples=3)
+for i in dataset:
+    print(i)
+    ct += 1
+    if ct > 5:
+        break
+
+print("\n \n CLIPS ")
+ct = 0
+dataset = AVClipReader(datasets.load("ucf101"), num_frames_per_clip=16, num_clips_per_video=8)
+for i in dataset:
+    print(i['path'], i["range"])
+    ct += 1
+    if ct > 5:
+        break  
diff --git a/torchvision/prototype/datasets/_builtin/__init__.py b/torchvision/prototype/datasets/_builtin/__init__.py
@@ -1,6 +1,7 @@
 from .caltech import Caltech101, Caltech256
 from .celeba import CelebA
 from .cifar import Cifar10, Cifar100
+from .ucf101 import ucf101
 from .coco import Coco
 from .imagenet import ImageNet
 from .mnist import MNIST, FashionMNIST, KMNIST, EMNIST, QMNIST

diff --git a/torchvision/prototype/datasets/_builtin/ucf101.py b/torchvision/prototype/datasets/_builtin/ucf101.py
@@ -0,0 +1,94 @@
+import io
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+from torchvision.prototype.datasets.utils._internal import RarArchiveReader, INFINITE_BUFFER_SIZE
+
+import numpy as np
+import torch
+from torchdata.datapipes.iter import CSVParser, KeyZipper
+from torch.utils.data import IterDataPipe
+from torch.utils.data.datapipes.iter import (
+    Filter,
+    Mapper,
+    ZipArchiveReader,
+    Shuffler,
+)
+from torchvision.prototype.datasets.decoder import raw
+from torchvision.prototype.datasets.utils import (
+    Dataset,
+    DatasetConfig,
+    DatasetInfo,
+    HttpResource,
+    OnlineResource,
+    DatasetType,
+)
+
+
+class ucf101(Dataset):
+    """This is a base datapipe that returns a file handler of the video.
+    What we want to do is implement either several decoder options or additional
+    datapipe extensions to make this work.
+    """
+    @property
+    def info(self) -> DatasetInfo:
+        return DatasetInfo(
+            "ucf101",
+            type=DatasetType.VIDEO,
+            valid_options={'split': ["train", "test"], 'fold': ["1", "2", "3"]},
+            # categories=HERE / "ucf101.categories",
+            homepage="https://www.crcv.ucf.edu/data/UCF101.php",
+        )
+
+    def resources(self, config: DatasetConfig) -> List[OnlineResource]:
+        return [
+            HttpResource(
+                "https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip",
+                sha256="",
+            ),
+            HttpResource(
+                "https://www.crcv.ucf.edu/data/UCF101/UCF101.rar",
+                sha256="",
+            )
+        ]
+
+    def _collate_and_decode(
+        self,
+        data: Tuple[np.ndarray, int],
+        *,
+        decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]],
+    ) -> Dict[str, Any]:
+        annotations_d, file_d = data
+
+        label = annotations_d[1]
+        _path, file_handle = file_d
+        return {"path": _path, "file": file_handle, "target": label}
+
+    def _filtername(self, data, *, tgt):
+        return Path(data[0]).name == tgt
+
+    def _getname(self, data):
+        return Path(data[0]).name
+
+    def _make_datapipe(
+        self,
+        resource_dps: List[IterDataPipe],
+        *,
+        config: DatasetConfig,
+        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
+    ) -> IterDataPipe[Dict[str, Any]]:
+
+        annotations = resource_dps[0]
+        files = resource_dps[1]
+
+        annotations_dp = ZipArchiveReader(annotations)
+        annotations_dp = Filter(annotations_dp,
+                                self._filtername,
+                                fn_kwargs=dict(tgt=f"{config.split}list0{config.fold}.txt"))
+        annotations_dp = CSVParser(annotations_dp, delimiter=" ")
+        # COMMENT OUT FOR TESTING
+        annotations_dp = Shuffler(annotations_dp, buffer_size=INFINITE_BUFFER_SIZE)
+
+        files_dp = RarArchiveReader(files)
+        dp = KeyZipper(annotations_dp, files_dp, self._getname, self._getname)
+        return Mapper(dp, self._collate_and_decode, fn_kwargs=dict(decoder=decoder))
diff --git a/torchvision/prototype/datasets/decoder.py b/torchvision/prototype/datasets/decoder.py
@@ -1,5 +1,8 @@
 import io
+import unittest.mock
+from typing import Dict, Any
 
+import av
 import PIL.Image
 import torch
 from torchvision.transforms.functional import pil_to_tensor
@@ -13,3 +16,17 @@ def raw(buffer: io.IOBase) -> torch.Tensor:
 
 def pil(buffer: io.IOBase, mode: str = "RGB") -> torch.Tensor:
     return pil_to_tensor(PIL.Image.open(buffer).convert(mode.upper()))
+
+
+def av_kf(buffer: io.IOBase, **read_video_kwargs: Any) -> Dict[str, Any]:
+    with unittest.mock.patch("torchvision.io.video.os.path.exists", return_value=True):
+        keyframes, pts = [], []
+        with av.open(buffer) as container:
+            stream = container.streams.video[0]
+            stream.codec_context.skip_frame = 'NONKEY'
+            for frame in container.decode(stream):
+                keyframes.append(frame.to_image())
+                # TODO: convert to seconds
+                pts.append(frame.pts)
+
+        return {"keyframes": keyframes, "pts": pts}
diff --git a/torchvision/prototype/datasets/utils/_dataset.py b/torchvision/prototype/datasets/utils/_dataset.py
@@ -28,6 +28,7 @@
 class DatasetType(enum.Enum):
     RAW = enum.auto()
     IMAGE = enum.auto()
+    VIDEO = enum.auto()
 
 
 class DatasetConfig(FrozenBunch):
@@ -148,7 +149,7 @@ def _make_datapipe(
         resource_dps: List[IterDataPipe],
         *,
         config: DatasetConfig,
-        decoder: Optional[Callable[[io.IOBase], torch.Tensor]],
+        decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]],
     ) -> IterDataPipe[Dict[str, Any]]:
         pass
 
@@ -157,7 +158,7 @@ def to_datapipe(
         root: Union[str, pathlib.Path],
         *,
         config: Optional[DatasetConfig] = None,
-        decoder: Optional[Callable[[io.IOBase], torch.Tensor]] = None,
+        decoder: Optional[Callable[[io.IOBase], Dict[str, Any]]] = None,
     ) -> IterDataPipe[Dict[str, Any]]:
         if not config:
             config = self.info.default_config

diff --git a/torchvision/prototype/datasets/utils/_internal.py b/torchvision/prototype/datasets/utils/_internal.py
@@ -48,6 +48,7 @@
     "path_accessor",
     "path_comparator",
     "Decompressor",
+    "RarArchiveReader",
 ]
 
 K = TypeVar("K")
@@ -277,3 +278,37 @@ def __iter__(self) -> Iterator[Tuple[str, io.IOBase]]:
             type = self._detect_compression_type(path)
             decompressor = self._DECOMPRESSORS[type]
             yield path, decompressor(file)
+
+
+class RarArchiveReader(IterDataPipe[Tuple[str, io.BufferedIOBase]]):
+    def __init__(self, datapipe: IterDataPipe[Tuple[str, io.BufferedIOBase]]):
+        self._rarfile = self._verify_dependencies()
+        super().__init__()
+        self.datapipe = datapipe
+
+    @staticmethod
+    def _verify_dependencies():
+        try:
+            import rarfile
+        except ImportError as error:
+            raise ModuleNotFoundError(
+                "Package `rarfile` is required to be installed to use this datapipe. "
+                "Please use `pip install rarfile` or `conda -c conda-forge install rarfile` to install it."
+            ) from error
+
+        # check if at least one system library for reading rar archives is available to be used by rarfile
+        rarfile.tool_setup()
+
+        return rarfile
+
+    def __iter__(self) -> Iterator[Tuple[str, io.BufferedIOBase]]:
+        for path, stream in self.datapipe:
+            rar = self._rarfile.RarFile(stream)
+            for info in rar.infolist():
+                if info.filename.endswith("/"):
+                    continue
+
+                inner_path = os.path.join(path, info.filename)
+                file_obj = rar.open(info)
+
+                yield inner_path, file_obj