Skip to content

Commit 97085f2

Browse files
NicolasHugfacebook-github-bot
authored andcommitted
[fbsync] Add FallingThings dataset (#6346)
Summary: * Added Falling Things datasets * Renamed split to variant * Update torchvision/datasets/_stereo_matching.py Changed constant formatting Reviewed By: datumbox Differential Revision: D38824218 fbshipit-source-id: adeeeec057bd0afd1fb286e11526b871e62d376f Co-authored-by: Nicolas Hug <[email protected]> Co-authored-by: Nicolas Hug <[email protected]>
1 parent 13f7a71 commit 97085f2

File tree

4 files changed

+173
-1
lines changed

4 files changed

+173
-1
lines changed

docs/source/datasets.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ Stereo Matching
111111
CarlaStereo
112112
Kitti2012Stereo
113113
Kitti2015Stereo
114+
FallingThingsStereo
114115
SceneFlowStereo
115116
SintelStereo
116117
InStereo2k

test/test_datasets.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2841,6 +2841,69 @@ def test_train_splits(self):
28412841
datasets_utils.shape_test_for_stereo(left, right, disparity)
28422842

28432843

2844+
class FallingThingsStereoTestCase(datasets_utils.ImageDatasetTestCase):
2845+
DATASET_CLASS = datasets.FallingThingsStereo
2846+
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(variant=("single", "mixed", "both"))
2847+
FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)))
2848+
2849+
@staticmethod
2850+
def _make_dummy_depth_map(root: str, name: str, size: Tuple[int, int]):
2851+
file = pathlib.Path(root) / name
2852+
image = np.ones((size[0], size[1]), dtype=np.uint8)
2853+
PIL.Image.fromarray(image).save(file)
2854+
2855+
@staticmethod
2856+
def _make_scene_folder(root: str, scene_name: str, size: Tuple[int, int]) -> None:
2857+
root = pathlib.Path(root) / scene_name
2858+
os.makedirs(root, exist_ok=True)
2859+
# jpg images
2860+
datasets_utils.create_image_file(root, "image1.left.jpg", size=(3, size[1], size[0]))
2861+
datasets_utils.create_image_file(root, "image1.right.jpg", size=(3, size[1], size[0]))
2862+
# single channel depth maps
2863+
FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.left.depth.png", size=(size[0], size[1]))
2864+
FallingThingsStereoTestCase._make_dummy_depth_map(root, "image1.right.depth.png", size=(size[0], size[1]))
2865+
# camera settings json. Minimal example for _read_disparity function testing
2866+
settings_json = {"camera_settings": [{"intrinsic_settings": {"fx": 1}}]}
2867+
with open(root / "_camera_settings.json", "w") as f:
2868+
json.dump(settings_json, f)
2869+
2870+
def inject_fake_data(self, tmpdir, config):
2871+
fallingthings_dir = pathlib.Path(tmpdir) / "FallingThings"
2872+
os.makedirs(fallingthings_dir, exist_ok=True)
2873+
2874+
num_examples = {"single": 2, "mixed": 3, "both": 4}.get(config["variant"], 0)
2875+
variants = {
2876+
"single": ["single"],
2877+
"mixed": ["mixed"],
2878+
"both": ["single", "mixed"],
2879+
}.get(config["variant"], [])
2880+
2881+
for variant_name in variants:
2882+
variant_dir = pathlib.Path(fallingthings_dir) / variant_name
2883+
os.makedirs(variant_dir, exist_ok=True)
2884+
for i in range(num_examples):
2885+
self._make_scene_folder(
2886+
root=variant_dir,
2887+
scene_name=f"scene_{i:06d}",
2888+
size=(100, 200),
2889+
)
2890+
2891+
if config["variant"] == "both":
2892+
num_examples *= 2
2893+
return num_examples
2894+
2895+
def test_splits(self):
2896+
for variant_name in ["single", "mixed"]:
2897+
with self.create_dataset(variant=variant_name) as (dataset, _):
2898+
for left, right, disparity in dataset:
2899+
datasets_utils.shape_test_for_stereo(left, right, disparity)
2900+
2901+
def test_bad_input(self):
2902+
with pytest.raises(ValueError, match="Unknown value 'bad' for argument variant"):
2903+
with self.create_dataset(variant="bad"):
2904+
pass
2905+
2906+
28442907
class SceneFlowStereoTestCase(datasets_utils.ImageDatasetTestCase):
28452908
DATASET_CLASS = datasets.SceneFlowStereo
28462909
ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(

torchvision/datasets/__init__.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
from ._optical_flow import FlyingChairs, FlyingThings3D, HD1K, KittiFlow, Sintel
2-
from ._stereo_matching import CarlaStereo, InStereo2k, Kitti2012Stereo, Kitti2015Stereo, SceneFlowStereo, SintelStereo
2+
from ._stereo_matching import (
3+
CarlaStereo,
4+
FallingThingsStereo,
5+
InStereo2k,
6+
Kitti2012Stereo,
7+
Kitti2015Stereo,
8+
SceneFlowStereo,
9+
SintelStereo,
10+
)
311
from .caltech import Caltech101, Caltech256
412
from .celeba import CelebA
513
from .cifar import CIFAR10, CIFAR100
@@ -109,6 +117,7 @@
109117
"Kitti2012Stereo",
110118
"Kitti2015Stereo",
111119
"CarlaStereo",
120+
"FallingThingsStereo",
112121
"SceneFlowStereo",
113122
"SintelStereo",
114123
"InStereo2k",

torchvision/datasets/_stereo_matching.py

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import functools
2+
import json
23
import os
34
from abc import ABC, abstractmethod
45
from glob import glob
@@ -362,6 +363,104 @@ def __getitem__(self, index: int) -> Tuple:
362363
return super().__getitem__(index)
363364

364365

366+
class FallingThingsStereo(StereoMatchingDataset):
367+
"""`FallingThings <https://research.nvidia.com/publication/2018-06_falling-things-synthetic-dataset-3d-object-detection-and-pose-estimation>`_ dataset.
368+
369+
The dataset is expected to have the following structre: ::
370+
371+
root
372+
FallingThings
373+
single
374+
scene1
375+
_object_settings.json
376+
_camera_settings.json
377+
image1.left.depth.png
378+
image1.right.depth.png
379+
image1.left.jpg
380+
image1.right.jpg
381+
image2.left.depth.png
382+
image2.right.depth.png
383+
image2.left.jpg
384+
image2.right
385+
...
386+
scene2
387+
...
388+
mixed
389+
scene1
390+
_object_settings.json
391+
_camera_settings.json
392+
image1.left.depth.png
393+
image1.right.depth.png
394+
image1.left.jpg
395+
image1.right.jpg
396+
image2.left.depth.png
397+
image2.right.depth.png
398+
image2.left.jpg
399+
image2.right
400+
...
401+
scene2
402+
...
403+
404+
Args:
405+
root (string): Root directory where FallingThings is located.
406+
variant (string): Which variant to use. Either "single", "mixed", or "both".
407+
transforms (callable, optional): A function/transform that takes in a sample and returns a transformed version.
408+
"""
409+
410+
def __init__(self, root: str, variant: str = "single", transforms: Optional[Callable] = None):
411+
super().__init__(root, transforms)
412+
413+
root = Path(root) / "FallingThings"
414+
415+
verify_str_arg(variant, "variant", valid_values=("single", "mixed", "both"))
416+
417+
variants = {
418+
"single": ["single"],
419+
"mixed": ["mixed"],
420+
"both": ["single", "mixed"],
421+
}[variant]
422+
423+
for s in variants:
424+
left_img_pattern = str(root / s / "*" / "*.left.jpg")
425+
right_img_pattern = str(root / s / "*" / "*.right.jpg")
426+
self._images += self._scan_pairs(left_img_pattern, right_img_pattern)
427+
428+
left_disparity_pattern = str(root / s / "*" / "*.left.depth.png")
429+
right_disparity_pattern = str(root / s / "*" / "*.right.depth.png")
430+
self._disparities += self._scan_pairs(left_disparity_pattern, right_disparity_pattern)
431+
432+
def _read_disparity(self, file_path: str) -> Tuple:
433+
# (H, W) image
434+
depth = np.asarray(Image.open(file_path))
435+
# as per https://research.nvidia.com/sites/default/files/pubs/2018-06_Falling-Things/readme_0.txt
436+
# in order to extract disparity from depth maps
437+
camera_settings_path = Path(file_path).parent / "_camera_settings.json"
438+
with open(camera_settings_path, "r") as f:
439+
# inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constatnt)
440+
intrinsics = json.load(f)
441+
focal = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"]
442+
baseline, pixel_constant = 6, 100 # pixel constant is inverted
443+
disparity_map = (baseline * focal * pixel_constant) / depth.astype(np.float32)
444+
# unsqueeze disparity to (C, H, W)
445+
disparity_map = disparity_map[None, :, :]
446+
valid_mask = None
447+
return disparity_map, valid_mask
448+
449+
def __getitem__(self, index: int) -> Tuple:
450+
"""Return example at given index.
451+
452+
Args:
453+
index(int): The index of the example to retrieve
454+
455+
Returns:
456+
tuple: A 3-tuple with ``(img_left, img_right, disparity)``.
457+
The disparity is a numpy array of shape (1, H, W) and the images are PIL images.
458+
If a ``valid_mask`` is generated within the ``transforms`` parameter,
459+
a 4-tuple with ``(img_left, img_right, disparity, valid_mask)`` is returned.
460+
"""
461+
return super().__getitem__(index)
462+
463+
365464
class SceneFlowStereo(StereoMatchingDataset):
366465
"""Dataset interface for `Scene Flow <https://lmb.informatik.uni-freiburg.de/resources/datasets/SceneFlowDatasets.en.html>`_ datasets.
367466
This interface provides access to the `FlyingThings3D, `Monkaa` and `Driving` datasets.

0 commit comments

Comments
 (0)