From a9448c33d4fc720d7b385a0d15e504f30c872cb3 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 17 Feb 2023 10:07:15 +0100 Subject: [PATCH 01/10] split datapoints tests --- test/test_datapoints.py | 74 +++++++++++++++++++++++++++++++ test/test_prototype_datapoints.py | 72 ------------------------------ 2 files changed, 74 insertions(+), 72 deletions(-) create mode 100644 test/test_datapoints.py diff --git a/test/test_datapoints.py b/test/test_datapoints.py new file mode 100644 index 00000000000..5a332fe5d4c --- /dev/null +++ b/test/test_datapoints.py @@ -0,0 +1,74 @@ +import re + +import pytest +import torch +from PIL import Image + +from torchvision import datapoints, datasets + + +@pytest.mark.parametrize("data", [torch.rand(3, 32, 32), Image.new("RGB", (32, 32), color=123)]) +def test_image_instance(data): + image = datapoints.Image(data) + assert isinstance(image, torch.Tensor) + assert image.ndim == 3 and image.shape[0] == 3 + + +@pytest.mark.parametrize("data", [torch.randint(0, 10, size=(1, 32, 32)), Image.new("L", (32, 32), color=2)]) +def test_mask_instance(data): + mask = datapoints.Mask(data) + assert isinstance(mask, torch.Tensor) + assert mask.ndim == 3 and mask.shape[0] == 1 + + +@pytest.mark.parametrize("data", [torch.randint(0, 32, size=(5, 4)), [[0, 0, 5, 5], [2, 2, 7, 7]]]) +@pytest.mark.parametrize( + "format", ["XYXY", "CXCYWH", datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH] +) +def test_bbox_instance(data, format): + bboxes = datapoints.BoundingBox(data, format=format, spatial_size=(32, 32)) + assert isinstance(bboxes, torch.Tensor) + assert bboxes.ndim == 2 and bboxes.shape[1] == 4 + if isinstance(format, str): + format = datapoints.BoundingBoxFormat.from_str(format.upper()) + assert bboxes.format == format + + +class TestDatasetWrapper: + def test_unknown_type(self): + unknown_object = object() + with pytest.raises( + TypeError, match=re.escape("is meant for subclasses of `torchvision.datasets.VisionDataset`") + ): + datapoints.wrap_dataset_for_transforms_v2(unknown_object) + + def test_unknown_dataset(self): + class MyVisionDataset(datasets.VisionDataset): + pass + + dataset = MyVisionDataset("root") + + with pytest.raises(TypeError, match="No wrapper exist"): + datapoints.wrap_dataset_for_transforms_v2(dataset) + + def test_missing_wrapper(self): + dataset = datasets.FakeData() + + with pytest.raises(TypeError, match="please open an issue"): + datapoints.wrap_dataset_for_transforms_v2(dataset) + + def test_subclass(self, mocker): + sentinel = object() + mocker.patch.dict( + datapoints._dataset_wrapper.WRAPPER_FACTORIES, + clear=False, + values={datasets.FakeData: lambda dataset: lambda idx, sample: sentinel}, + ) + + class MyFakeData(datasets.FakeData): + pass + + dataset = MyFakeData() + wrapped_dataset = datapoints.wrap_dataset_for_transforms_v2(dataset) + + assert wrapped_dataset[0] is sentinel diff --git a/test/test_prototype_datapoints.py b/test/test_prototype_datapoints.py index 615fa9f614d..04e3cd67f96 100644 --- a/test/test_prototype_datapoints.py +++ b/test/test_prototype_datapoints.py @@ -1,11 +1,6 @@ -import re - import pytest import torch -from PIL import Image - -from torchvision import datapoints, datasets from torchvision.prototype import datapoints as proto_datapoints @@ -136,70 +131,3 @@ def test_wrap_like(): assert type(label_new) is proto_datapoints.Label assert label_new.data_ptr() == output.data_ptr() assert label_new.categories is label.categories - - -@pytest.mark.parametrize("data", [torch.rand(3, 32, 32), Image.new("RGB", (32, 32), color=123)]) -def test_image_instance(data): - image = datapoints.Image(data) - assert isinstance(image, torch.Tensor) - assert image.ndim == 3 and image.shape[0] == 3 - - -@pytest.mark.parametrize("data", [torch.randint(0, 10, size=(1, 32, 32)), Image.new("L", (32, 32), color=2)]) -def test_mask_instance(data): - mask = datapoints.Mask(data) - assert isinstance(mask, torch.Tensor) - assert mask.ndim == 3 and mask.shape[0] == 1 - - -@pytest.mark.parametrize("data", [torch.randint(0, 32, size=(5, 4)), [[0, 0, 5, 5], [2, 2, 7, 7]]]) -@pytest.mark.parametrize( - "format", ["XYXY", "CXCYWH", datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH] -) -def test_bbox_instance(data, format): - bboxes = datapoints.BoundingBox(data, format=format, spatial_size=(32, 32)) - assert isinstance(bboxes, torch.Tensor) - assert bboxes.ndim == 2 and bboxes.shape[1] == 4 - if isinstance(format, str): - format = datapoints.BoundingBoxFormat.from_str(format.upper()) - assert bboxes.format == format - - -class TestDatasetWrapper: - def test_unknown_type(self): - unknown_object = object() - with pytest.raises( - TypeError, match=re.escape("is meant for subclasses of `torchvision.datasets.VisionDataset`") - ): - datapoints.wrap_dataset_for_transforms_v2(unknown_object) - - def test_unknown_dataset(self): - class MyVisionDataset(datasets.VisionDataset): - pass - - dataset = MyVisionDataset("root") - - with pytest.raises(TypeError, match="No wrapper exist"): - datapoints.wrap_dataset_for_transforms_v2(dataset) - - def test_missing_wrapper(self): - dataset = datasets.FakeData() - - with pytest.raises(TypeError, match="please open an issue"): - datapoints.wrap_dataset_for_transforms_v2(dataset) - - def test_subclass(self, mocker): - sentinel = object() - mocker.patch.dict( - datapoints._dataset_wrapper.WRAPPER_FACTORIES, - clear=False, - values={datasets.FakeData: lambda dataset: lambda idx, sample: sentinel}, - ) - - class MyFakeData(datasets.FakeData): - pass - - dataset = MyFakeData() - wrapped_dataset = datapoints.wrap_dataset_for_transforms_v2(dataset) - - assert wrapped_dataset[0] is sentinel From 085097e70c9892c803bebcb9757fd8081985eade Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 17 Feb 2023 10:31:34 +0100 Subject: [PATCH 02/10] port functional and consistency tests --- test/builtin_dataset_mocks.py | 3 +- test/common_utils.py | 611 ++++++++++++++++- test/datasets_utils.py | 17 - test/prototype_common_utils.py | 622 +----------------- test/test_datasets.py | 111 ++-- test/test_prototype_transforms.py | 4 +- ...y.py => test_transforms_v2_consistency.py} | 18 +- ...al.py => test_transforms_v2_functional.py} | 13 +- ...s_utils.py => test_transforms_v2_utils.py} | 2 +- ...s.py => transforms_v2_dispatcher_infos.py} | 6 +- ...infos.py => transforms_v2_kernel_infos.py} | 4 +- 11 files changed, 693 insertions(+), 718 deletions(-) rename test/{test_prototype_transforms_consistency.py => test_transforms_v2_consistency.py} (99%) rename test/{test_prototype_transforms_functional.py => test_transforms_v2_functional.py} (99%) rename test/{test_prototype_transforms_utils.py => test_transforms_v2_utils.py} (97%) rename test/{prototype_transforms_dispatcher_infos.py => transforms_v2_dispatcher_infos.py} (98%) rename test/{prototype_transforms_kernel_infos.py => transforms_v2_kernel_infos.py} (99%) diff --git a/test/builtin_dataset_mocks.py b/test/builtin_dataset_mocks.py index bbccec20856..ef5d5e1ec96 100644 --- a/test/builtin_dataset_mocks.py +++ b/test/builtin_dataset_mocks.py @@ -18,7 +18,8 @@ import numpy as np import pytest import torch -from datasets_utils import combinations_grid, create_image_file, create_image_folder, make_tar, make_zip +from common_utils import combinations_grid +from datasets_utils import create_image_file, create_image_folder, make_tar, make_zip from torch.nn.functional import one_hot from torch.testing import make_tensor as _make_tensor from torchvision.prototype import datasets diff --git a/test/common_utils.py b/test/common_utils.py index b76158b6c9c..670115c6ec0 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -1,16 +1,29 @@ +import collections.abc import contextlib +import dataclasses +import enum import functools +import itertools import os +import pathlib import random import shutil import tempfile +from collections import defaultdict +from typing import Callable, Sequence, Tuple, Union import numpy as np + +import PIL.Image +import pytest import torch +import torch.testing from PIL import Image -from torchvision import io -import __main__ # noqa: 401 +from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair +from torchvision import datapoints, io +from torchvision.transforms._functional_tensor import _max_value as get_max_value +from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_tensor IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"]) @@ -137,9 +150,6 @@ def _create_data_batch(height=3, width=3, channels=3, num_samples=4, device="cpu return batch_tensor -assert_equal = functools.partial(torch.testing.assert_close, rtol=0, atol=0) - - def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None): names = [] for i in range(num_videos): @@ -160,6 +170,7 @@ def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None): def _assert_equal_tensor_to_pil(tensor, pil_image, msg=None): + # FIXME: this is handled automatically by `assert_equal` below. Let's remove this in favor of it np_pil_image = np.array(pil_image) if np_pil_image.ndim == 2: np_pil_image = np_pil_image[:, :, None] @@ -172,6 +183,7 @@ def _assert_equal_tensor_to_pil(tensor, pil_image, msg=None): def _assert_approx_equal_tensor_to_pil( tensor, pil_image, tol=1e-5, msg=None, agg_method="mean", allowed_percentage_diff=None ): + # FIXME: this is handled automatically by `assert_close` below. Let's remove this in favor of it # TODO: we could just merge this into _assert_equal_tensor_to_pil np_pil_image = np.array(pil_image) if np_pil_image.ndim == 2: @@ -237,3 +249,592 @@ def wrapper(*args, **kwargs): return out return wrapper + + +def combinations_grid(**kwargs): + """Creates a grid of input combinations. + + Each element in the returned sequence is a dictionary containing one possible combination as values. + + Example: + >>> combinations_grid(foo=("bar", "baz"), spam=("eggs", "ham")) + [ + {'foo': 'bar', 'spam': 'eggs'}, + {'foo': 'bar', 'spam': 'ham'}, + {'foo': 'baz', 'spam': 'eggs'}, + {'foo': 'baz', 'spam': 'ham'} + ] + """ + return [dict(zip(kwargs.keys(), values)) for values in itertools.product(*kwargs.values())] + + +class ImagePair(TensorLikePair): + def __init__( + self, + actual, + expected, + *, + mae=False, + **other_parameters, + ): + if all(isinstance(input, PIL.Image.Image) for input in [actual, expected]): + actual, expected = [to_image_tensor(input) for input in [actual, expected]] + + super().__init__(actual, expected, **other_parameters) + self.mae = mae + + def compare(self) -> None: + actual, expected = self.actual, self.expected + + self._compare_attributes(actual, expected) + actual, expected = self._equalize_attributes(actual, expected) + + if self.mae: + actual, expected = self._promote_for_comparison(actual, expected) + mae = float(torch.abs(actual - expected).float().mean()) + if mae > self.atol: + self._fail( + AssertionError, + f"The MAE of the images is {mae}, but only {self.atol} is allowed.", + ) + else: + super()._compare_values(actual, expected) + + +def assert_close( + actual, + expected, + *, + allow_subclasses=True, + rtol=None, + atol=None, + equal_nan=False, + check_device=True, + check_dtype=True, + check_layout=True, + check_stride=False, + msg=None, + **kwargs, +): + """Superset of :func:`torch.testing.assert_close` with support for PIL vs. tensor image comparison""" + __tracebackhide__ = True + + error_metas = not_close_error_metas( + actual, + expected, + pair_types=( + NonePair, + BooleanPair, + NumberPair, + ImagePair, + TensorLikePair, + ), + allow_subclasses=allow_subclasses, + rtol=rtol, + atol=atol, + equal_nan=equal_nan, + check_device=check_device, + check_dtype=check_dtype, + check_layout=check_layout, + check_stride=check_stride, + **kwargs, + ) + + if error_metas: + raise error_metas[0].to_error(msg) + + +assert_equal = functools.partial(assert_close, rtol=0, atol=0) + + +def parametrized_error_message(*args, **kwargs): + def to_str(obj): + if isinstance(obj, torch.Tensor) and obj.numel() > 10: + return f"tensor(shape={list(obj.shape)}, dtype={obj.dtype}, device={obj.device})" + elif isinstance(obj, enum.Enum): + return f"{type(obj).__name__}.{obj.name}" + else: + return repr(obj) + + if args or kwargs: + postfix = "\n".join( + [ + "", + "Failure happened for the following parameters:", + "", + *[to_str(arg) for arg in args], + *[f"{name}={to_str(kwarg)}" for name, kwarg in kwargs.items()], + ] + ) + else: + postfix = "" + + def wrapper(msg): + return msg + postfix + + return wrapper + + +class ArgsKwargs: + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + def __iter__(self): + yield self.args + yield self.kwargs + + def load(self, device="cpu"): + return ArgsKwargs( + *(arg.load(device) if isinstance(arg, TensorLoader) else arg for arg in self.args), + **{ + keyword: arg.load(device) if isinstance(arg, TensorLoader) else arg + for keyword, arg in self.kwargs.items() + }, + ) + + +DEFAULT_SQUARE_SPATIAL_SIZE = 15 +DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33) +DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9) +DEFAULT_SPATIAL_SIZES = ( + DEFAULT_LANDSCAPE_SPATIAL_SIZE, + DEFAULT_PORTRAIT_SPATIAL_SIZE, + DEFAULT_SQUARE_SPATIAL_SIZE, + "random", +) + + +def _parse_spatial_size(size, *, name="size"): + if size == "random": + return tuple(torch.randint(15, 33, (2,)).tolist()) + elif isinstance(size, int) and size > 0: + return (size, size) + elif ( + isinstance(size, collections.abc.Sequence) + and len(size) == 2 + and all(isinstance(length, int) and length > 0 for length in size) + ): + return tuple(size) + else: + raise pytest.UsageError( + f"'{name}' can either be `'random'`, a positive integer, or a sequence of two positive integers," + f"but got {size} instead." + ) + + +VALID_EXTRA_DIMS = ((), (4,), (2, 3)) +DEGENERATE_BATCH_DIMS = ((0,), (5, 0), (0, 5)) + +DEFAULT_EXTRA_DIMS = (*VALID_EXTRA_DIMS, *DEGENERATE_BATCH_DIMS) + + +def from_loader(loader_fn): + def wrapper(*args, **kwargs): + device = kwargs.pop("device", "cpu") + loader = loader_fn(*args, **kwargs) + return loader.load(device) + + return wrapper + + +def from_loaders(loaders_fn): + def wrapper(*args, **kwargs): + device = kwargs.pop("device", "cpu") + loaders = loaders_fn(*args, **kwargs) + for loader in loaders: + yield loader.load(device) + + return wrapper + + +@dataclasses.dataclass +class TensorLoader: + fn: Callable[[Sequence[int], torch.dtype, Union[str, torch.device]], torch.Tensor] + shape: Sequence[int] + dtype: torch.dtype + + def load(self, device): + return self.fn(self.shape, self.dtype, device) + + +@dataclasses.dataclass +class ImageLoader(TensorLoader): + spatial_size: Tuple[int, int] = dataclasses.field(init=False) + num_channels: int = dataclasses.field(init=False) + + def __post_init__(self): + self.spatial_size = self.shape[-2:] + self.num_channels = self.shape[-3] + + +NUM_CHANNELS_MAP = { + "GRAY": 1, + "GRAY_ALPHA": 2, + "RGB": 3, + "RGBA": 4, +} + + +def get_num_channels(color_space): + num_channels = NUM_CHANNELS_MAP.get(color_space) + if not num_channels: + raise pytest.UsageError(f"Can't determine the number of channels for color space {color_space}") + return num_channels + + +def make_image_loader( + size="random", + *, + color_space="RGB", + extra_dims=(), + dtype=torch.float32, + constant_alpha=True, +): + size = _parse_spatial_size(size) + num_channels = get_num_channels(color_space) + + def fn(shape, dtype, device): + max_value = get_max_value(dtype) + data = torch.testing.make_tensor(shape, low=0, high=max_value, dtype=dtype, device=device) + if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha: + data[..., -1, :, :] = max_value + return datapoints.Image(data) + + return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype) + + +make_image = from_loader(make_image_loader) + + +def make_image_loaders( + *, + sizes=DEFAULT_SPATIAL_SIZES, + color_spaces=( + "GRAY", + "GRAY_ALPHA", + "RGB", + "RGBA", + ), + extra_dims=DEFAULT_EXTRA_DIMS, + dtypes=(torch.float32, torch.float64, torch.uint8), + constant_alpha=True, +): + for params in combinations_grid(size=sizes, color_space=color_spaces, extra_dims=extra_dims, dtype=dtypes): + yield make_image_loader(**params, constant_alpha=constant_alpha) + + +make_images = from_loaders(make_image_loaders) + + +def make_image_loader_for_interpolation(size="random", *, color_space="RGB", dtype=torch.uint8): + size = _parse_spatial_size(size) + num_channels = get_num_channels(color_space) + + def fn(shape, dtype, device): + height, width = shape[-2:] + + image_pil = ( + PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg") + .resize((width, height)) + .convert( + { + "GRAY": "L", + "GRAY_ALPHA": "LA", + "RGB": "RGB", + "RGBA": "RGBA", + }[color_space] + ) + ) + + image_tensor = convert_dtype_image_tensor(to_image_tensor(image_pil).to(device=device), dtype=dtype) + + return datapoints.Image(image_tensor) + + return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype) + + +def make_image_loaders_for_interpolation( + sizes=((233, 147),), + color_spaces=("RGB",), + dtypes=(torch.uint8,), +): + for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes): + yield make_image_loader_for_interpolation(**params) + + +@dataclasses.dataclass +class BoundingBoxLoader(TensorLoader): + format: datapoints.BoundingBoxFormat + spatial_size: Tuple[int, int] + + +def randint_with_tensor_bounds(arg1, arg2=None, **kwargs): + low, high = torch.broadcast_tensors( + *[torch.as_tensor(arg) for arg in ((0, arg1) if arg2 is None else (arg1, arg2))] + ) + return torch.stack( + [ + torch.randint(low_scalar, high_scalar, (), **kwargs) + for low_scalar, high_scalar in zip(low.flatten().tolist(), high.flatten().tolist()) + ] + ).reshape(low.shape) + + +def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32): + if isinstance(format, str): + format = datapoints.BoundingBoxFormat[format] + if format not in { + datapoints.BoundingBoxFormat.XYXY, + datapoints.BoundingBoxFormat.XYWH, + datapoints.BoundingBoxFormat.CXCYWH, + }: + raise pytest.UsageError(f"Can't make bounding box in format {format}") + + spatial_size = _parse_spatial_size(spatial_size, name="spatial_size") + + def fn(shape, dtype, device): + *extra_dims, num_coordinates = shape + if num_coordinates != 4: + raise pytest.UsageError() + + if any(dim == 0 for dim in extra_dims): + return datapoints.BoundingBox( + torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size + ) + + height, width = spatial_size + + if format == datapoints.BoundingBoxFormat.XYXY: + x1 = torch.randint(0, width // 2, extra_dims) + y1 = torch.randint(0, height // 2, extra_dims) + x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1 + y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1 + parts = (x1, y1, x2, y2) + elif format == datapoints.BoundingBoxFormat.XYWH: + x = torch.randint(0, width // 2, extra_dims) + y = torch.randint(0, height // 2, extra_dims) + w = randint_with_tensor_bounds(1, width - x) + h = randint_with_tensor_bounds(1, height - y) + parts = (x, y, w, h) + else: # format == features.BoundingBoxFormat.CXCYWH: + cx = torch.randint(1, width - 1, extra_dims) + cy = torch.randint(1, height - 1, extra_dims) + w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1) + h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1) + parts = (cx, cy, w, h) + + return datapoints.BoundingBox( + torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size + ) + + return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size) + + +make_bounding_box = from_loader(make_bounding_box_loader) + + +def make_bounding_box_loaders( + *, + extra_dims=DEFAULT_EXTRA_DIMS, + formats=tuple(datapoints.BoundingBoxFormat), + spatial_size="random", + dtypes=(torch.float32, torch.float64, torch.int64), +): + for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes): + yield make_bounding_box_loader(**params, spatial_size=spatial_size) + + +make_bounding_boxes = from_loaders(make_bounding_box_loaders) + + +class MaskLoader(TensorLoader): + pass + + +def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8): + # This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects + size = _parse_spatial_size(size) + num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects + + def fn(shape, dtype, device): + data = torch.testing.make_tensor(shape, low=0, high=2, dtype=dtype, device=device) + return datapoints.Mask(data) + + return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype) + + +make_detection_mask = from_loader(make_detection_mask_loader) + + +def make_detection_mask_loaders( + sizes=DEFAULT_SPATIAL_SIZES, + num_objects=(1, 0, "random"), + extra_dims=DEFAULT_EXTRA_DIMS, + dtypes=(torch.uint8,), +): + for params in combinations_grid(size=sizes, num_objects=num_objects, extra_dims=extra_dims, dtype=dtypes): + yield make_detection_mask_loader(**params) + + +make_detection_masks = from_loaders(make_detection_mask_loaders) + + +def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8): + # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values + size = _parse_spatial_size(size) + num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories + + def fn(shape, dtype, device): + data = torch.testing.make_tensor(shape, low=0, high=num_categories, dtype=dtype, device=device) + return datapoints.Mask(data) + + return MaskLoader(fn, shape=(*extra_dims, *size), dtype=dtype) + + +make_segmentation_mask = from_loader(make_segmentation_mask_loader) + + +def make_segmentation_mask_loaders( + *, + sizes=DEFAULT_SPATIAL_SIZES, + num_categories=(1, 2, "random"), + extra_dims=DEFAULT_EXTRA_DIMS, + dtypes=(torch.uint8,), +): + for params in combinations_grid(size=sizes, num_categories=num_categories, extra_dims=extra_dims, dtype=dtypes): + yield make_segmentation_mask_loader(**params) + + +make_segmentation_masks = from_loaders(make_segmentation_mask_loaders) + + +def make_mask_loaders( + *, + sizes=DEFAULT_SPATIAL_SIZES, + num_objects=(1, 0, "random"), + num_categories=(1, 2, "random"), + extra_dims=DEFAULT_EXTRA_DIMS, + dtypes=(torch.uint8,), +): + yield from make_detection_mask_loaders(sizes=sizes, num_objects=num_objects, extra_dims=extra_dims, dtypes=dtypes) + yield from make_segmentation_mask_loaders( + sizes=sizes, num_categories=num_categories, extra_dims=extra_dims, dtypes=dtypes + ) + + +make_masks = from_loaders(make_mask_loaders) + + +class VideoLoader(ImageLoader): + pass + + +def make_video_loader( + size="random", + *, + color_space="RGB", + num_frames="random", + extra_dims=(), + dtype=torch.uint8, +): + size = _parse_spatial_size(size) + num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames + + def fn(shape, dtype, device): + video = make_image(size=shape[-2:], extra_dims=shape[:-3], dtype=dtype, device=device) + return datapoints.Video(video) + + return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype) + + +make_video = from_loader(make_video_loader) + + +def make_video_loaders( + *, + sizes=DEFAULT_SPATIAL_SIZES, + color_spaces=( + "GRAY", + "RGB", + ), + num_frames=(1, 0, "random"), + extra_dims=DEFAULT_EXTRA_DIMS, + dtypes=(torch.uint8, torch.float32, torch.float64), +): + for params in combinations_grid( + size=sizes, color_space=color_spaces, num_frames=num_frames, extra_dims=extra_dims, dtype=dtypes + ): + yield make_video_loader(**params) + + +make_videos = from_loaders(make_video_loaders) + + +class TestMark: + def __init__( + self, + # Tuple of test class name and test function name that identifies the test the mark is applied to. If there is + # no test class, i.e. a standalone test function, use `None`. + test_id, + # `pytest.mark.*` to apply, e.g. `pytest.mark.skip` or `pytest.mark.xfail` + mark, + *, + # Callable, that will be passed an `ArgsKwargs` and should return a boolean to indicate if the mark will be + # applied. If omitted, defaults to always apply. + condition=None, + ): + self.test_id = test_id + self.mark = mark + self.condition = condition or (lambda args_kwargs: True) + + +def mark_framework_limitation(test_id, reason, condition=None): + # The purpose of this function is to have a single entry point for skip marks that are only there, because the test + # framework cannot handle the kernel in general or a specific parameter combination. + # As development progresses, we can change the `mark.skip` to `mark.xfail` from time to time to see if the skip is + # still justified. + # We don't want to use `mark.xfail` all the time, because that actually runs the test until an error happens. Thus, + # we are wasting CI resources for no reason for most of the time + return TestMark(test_id, pytest.mark.skip(reason=reason), condition=condition) + + +class InfoBase: + def __init__( + self, + *, + # Identifier if the info that shows up the parametrization. + id, + # Test markers that will be (conditionally) applied to an `ArgsKwargs` parametrization. + # See the `TestMark` class for details + test_marks=None, + # Additional parameters, e.g. `rtol=1e-3`, passed to `assert_close`. Keys are a 3-tuple of `test_id` (see + # `TestMark`), the dtype, and the device. + closeness_kwargs=None, + ): + self.id = id + + self.test_marks = test_marks or [] + test_marks_map = defaultdict(list) + for test_mark in self.test_marks: + test_marks_map[test_mark.test_id].append(test_mark) + self._test_marks_map = dict(test_marks_map) + + self.closeness_kwargs = closeness_kwargs or dict() + + def get_marks(self, test_id, args_kwargs): + return [ + test_mark.mark for test_mark in self._test_marks_map.get(test_id, []) if test_mark.condition(args_kwargs) + ] + + def get_closeness_kwargs(self, test_id, *, dtype, device): + if not (isinstance(test_id, tuple) and len(test_id) == 2): + msg = "`test_id` should be a `Tuple[Optional[str], str]` denoting the test class and function name" + if callable(test_id): + msg += ". Did you forget to add the `test_id` fixture to parameters of the test?" + else: + msg += f", but got {test_id} instead." + raise pytest.UsageError(msg) + if isinstance(device, torch.device): + device = device.type + return self.closeness_kwargs.get((test_id, dtype, device), dict()) diff --git a/test/datasets_utils.py b/test/datasets_utils.py index e8290b55c4b..312d9568056 100644 --- a/test/datasets_utils.py +++ b/test/datasets_utils.py @@ -170,23 +170,6 @@ def wrapper(self): return wrapper -def combinations_grid(**kwargs): - """Creates a grid of input combinations. - - Each element in the returned sequence is a dictionary containing one possible combination as values. - - Example: - >>> combinations_grid(foo=("bar", "baz"), spam=("eggs", "ham")) - [ - {'foo': 'bar', 'spam': 'eggs'}, - {'foo': 'bar', 'spam': 'ham'}, - {'foo': 'baz', 'spam': 'eggs'}, - {'foo': 'baz', 'spam': 'ham'} - ] - """ - return [dict(zip(kwargs.keys(), values)) for values in itertools.product(*kwargs.values())] - - class DatasetTestCase(unittest.TestCase): """Abstract base class for all dataset testcases. diff --git a/test/prototype_common_utils.py b/test/prototype_common_utils.py index 33c390f9fe7..8259246c0cb 100644 --- a/test/prototype_common_utils.py +++ b/test/prototype_common_utils.py @@ -1,432 +1,14 @@ -"""This module is separated from common_utils.py to prevent the former to be dependent on torchvision.prototype""" - import collections.abc import dataclasses -import enum -import functools -import pathlib -from collections import defaultdict -from typing import Callable, Optional, Sequence, Tuple, Union +from typing import Optional, Sequence -import PIL.Image import pytest import torch -import torch.testing -import torchvision.prototype.datapoints as proto_datapoints -from datasets_utils import combinations_grid -from torch.nn.functional import one_hot -from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair -from torchvision import datapoints -from torchvision.transforms._functional_tensor import _max_value as get_max_value -from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_tensor - -__all__ = [ - "assert_close", - "assert_equal", - "ArgsKwargs", - "VALID_EXTRA_DIMS", - "make_image_loaders", - "make_image", - "make_images", - "make_bounding_box_loaders", - "make_bounding_box", - "make_bounding_boxes", - "make_label", - "make_one_hot_labels", - "make_detection_mask_loaders", - "make_detection_mask", - "make_detection_masks", - "make_segmentation_mask_loaders", - "make_segmentation_mask", - "make_segmentation_masks", - "make_mask_loaders", - "make_masks", - "make_video", - "make_videos", - "TestMark", - "mark_framework_limitation", - "InfoBase", -] - - -class ImagePair(TensorLikePair): - def __init__( - self, - actual, - expected, - *, - mae=False, - **other_parameters, - ): - if all(isinstance(input, PIL.Image.Image) for input in [actual, expected]): - actual, expected = [to_image_tensor(input) for input in [actual, expected]] - - super().__init__(actual, expected, **other_parameters) - self.mae = mae - - def compare(self) -> None: - actual, expected = self.actual, self.expected - - self._compare_attributes(actual, expected) - actual, expected = self._equalize_attributes(actual, expected) - - if self.mae: - actual, expected = self._promote_for_comparison(actual, expected) - mae = float(torch.abs(actual - expected).float().mean()) - if mae > self.atol: - self._fail( - AssertionError, - f"The MAE of the images is {mae}, but only {self.atol} is allowed.", - ) - else: - super()._compare_values(actual, expected) - - -def assert_close( - actual, - expected, - *, - allow_subclasses=True, - rtol=None, - atol=None, - equal_nan=False, - check_device=True, - check_dtype=True, - check_layout=True, - check_stride=False, - msg=None, - **kwargs, -): - """Superset of :func:`torch.testing.assert_close` with support for PIL vs. tensor image comparison""" - __tracebackhide__ = True - - error_metas = not_close_error_metas( - actual, - expected, - pair_types=( - NonePair, - BooleanPair, - NumberPair, - ImagePair, - TensorLikePair, - ), - allow_subclasses=allow_subclasses, - rtol=rtol, - atol=atol, - equal_nan=equal_nan, - check_device=check_device, - check_dtype=check_dtype, - check_layout=check_layout, - check_stride=check_stride, - **kwargs, - ) - - if error_metas: - raise error_metas[0].to_error(msg) - - -assert_equal = functools.partial(assert_close, rtol=0, atol=0) - - -def parametrized_error_message(*args, **kwargs): - def to_str(obj): - if isinstance(obj, torch.Tensor) and obj.numel() > 10: - return f"tensor(shape={list(obj.shape)}, dtype={obj.dtype}, device={obj.device})" - elif isinstance(obj, enum.Enum): - return f"{type(obj).__name__}.{obj.name}" - else: - return repr(obj) - - if args or kwargs: - postfix = "\n".join( - [ - "", - "Failure happened for the following parameters:", - "", - *[to_str(arg) for arg in args], - *[f"{name}={to_str(kwarg)}" for name, kwarg in kwargs.items()], - ] - ) - else: - postfix = "" - - def wrapper(msg): - return msg + postfix - - return wrapper - - -class ArgsKwargs: - def __init__(self, *args, **kwargs): - self.args = args - self.kwargs = kwargs - - def __iter__(self): - yield self.args - yield self.kwargs - - def load(self, device="cpu"): - return ArgsKwargs( - *(arg.load(device) if isinstance(arg, TensorLoader) else arg for arg in self.args), - **{ - keyword: arg.load(device) if isinstance(arg, TensorLoader) else arg - for keyword, arg in self.kwargs.items() - }, - ) - - -DEFAULT_SQUARE_SPATIAL_SIZE = 15 -DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33) -DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9) -DEFAULT_SPATIAL_SIZES = ( - DEFAULT_LANDSCAPE_SPATIAL_SIZE, - DEFAULT_PORTRAIT_SPATIAL_SIZE, - DEFAULT_SQUARE_SPATIAL_SIZE, - "random", -) - - -def _parse_spatial_size(size, *, name="size"): - if size == "random": - return tuple(torch.randint(15, 33, (2,)).tolist()) - elif isinstance(size, int) and size > 0: - return (size, size) - elif ( - isinstance(size, collections.abc.Sequence) - and len(size) == 2 - and all(isinstance(length, int) and length > 0 for length in size) - ): - return tuple(size) - else: - raise pytest.UsageError( - f"'{name}' can either be `'random'`, a positive integer, or a sequence of two positive integers," - f"but got {size} instead." - ) - - -VALID_EXTRA_DIMS = ((), (4,), (2, 3)) -DEGENERATE_BATCH_DIMS = ((0,), (5, 0), (0, 5)) - -DEFAULT_EXTRA_DIMS = (*VALID_EXTRA_DIMS, *DEGENERATE_BATCH_DIMS) - - -def from_loader(loader_fn): - def wrapper(*args, **kwargs): - device = kwargs.pop("device", "cpu") - loader = loader_fn(*args, **kwargs) - return loader.load(device) - - return wrapper - - -def from_loaders(loaders_fn): - def wrapper(*args, **kwargs): - device = kwargs.pop("device", "cpu") - loaders = loaders_fn(*args, **kwargs) - for loader in loaders: - yield loader.load(device) - - return wrapper - - -@dataclasses.dataclass -class TensorLoader: - fn: Callable[[Sequence[int], torch.dtype, Union[str, torch.device]], torch.Tensor] - shape: Sequence[int] - dtype: torch.dtype - - def load(self, device): - return self.fn(self.shape, self.dtype, device) - - -@dataclasses.dataclass -class ImageLoader(TensorLoader): - spatial_size: Tuple[int, int] = dataclasses.field(init=False) - num_channels: int = dataclasses.field(init=False) - - def __post_init__(self): - self.spatial_size = self.shape[-2:] - self.num_channels = self.shape[-3] - - -NUM_CHANNELS_MAP = { - "GRAY": 1, - "GRAY_ALPHA": 2, - "RGB": 3, - "RGBA": 4, -} - - -def get_num_channels(color_space): - num_channels = NUM_CHANNELS_MAP.get(color_space) - if not num_channels: - raise pytest.UsageError(f"Can't determine the number of channels for color space {color_space}") - return num_channels - - -def make_image_loader( - size="random", - *, - color_space="RGB", - extra_dims=(), - dtype=torch.float32, - constant_alpha=True, -): - size = _parse_spatial_size(size) - num_channels = get_num_channels(color_space) - - def fn(shape, dtype, device): - max_value = get_max_value(dtype) - data = torch.testing.make_tensor(shape, low=0, high=max_value, dtype=dtype, device=device) - if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha: - data[..., -1, :, :] = max_value - return datapoints.Image(data) - - return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype) - - -make_image = from_loader(make_image_loader) - - -def make_image_loaders( - *, - sizes=DEFAULT_SPATIAL_SIZES, - color_spaces=( - "GRAY", - "GRAY_ALPHA", - "RGB", - "RGBA", - ), - extra_dims=DEFAULT_EXTRA_DIMS, - dtypes=(torch.float32, torch.float64, torch.uint8), - constant_alpha=True, -): - for params in combinations_grid(size=sizes, color_space=color_spaces, extra_dims=extra_dims, dtype=dtypes): - yield make_image_loader(**params, constant_alpha=constant_alpha) - - -make_images = from_loaders(make_image_loaders) - - -def make_image_loader_for_interpolation(size="random", *, color_space="RGB", dtype=torch.uint8): - size = _parse_spatial_size(size) - num_channels = get_num_channels(color_space) - - def fn(shape, dtype, device): - height, width = shape[-2:] - - image_pil = ( - PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg") - .resize((width, height)) - .convert( - { - "GRAY": "L", - "GRAY_ALPHA": "LA", - "RGB": "RGB", - "RGBA": "RGBA", - }[color_space] - ) - ) - - image_tensor = convert_dtype_image_tensor(to_image_tensor(image_pil).to(device=device), dtype=dtype) - - return datapoints.Image(image_tensor) - - return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype) - - -def make_image_loaders_for_interpolation( - sizes=((233, 147),), - color_spaces=("RGB",), - dtypes=(torch.uint8,), -): - for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes): - yield make_image_loader_for_interpolation(**params) - - -@dataclasses.dataclass -class BoundingBoxLoader(TensorLoader): - format: datapoints.BoundingBoxFormat - spatial_size: Tuple[int, int] - - -def randint_with_tensor_bounds(arg1, arg2=None, **kwargs): - low, high = torch.broadcast_tensors( - *[torch.as_tensor(arg) for arg in ((0, arg1) if arg2 is None else (arg1, arg2))] - ) - return torch.stack( - [ - torch.randint(low_scalar, high_scalar, (), **kwargs) - for low_scalar, high_scalar in zip(low.flatten().tolist(), high.flatten().tolist()) - ] - ).reshape(low.shape) - - -def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32): - if isinstance(format, str): - format = datapoints.BoundingBoxFormat[format] - if format not in { - datapoints.BoundingBoxFormat.XYXY, - datapoints.BoundingBoxFormat.XYWH, - datapoints.BoundingBoxFormat.CXCYWH, - }: - raise pytest.UsageError(f"Can't make bounding box in format {format}") - - spatial_size = _parse_spatial_size(spatial_size, name="spatial_size") - - def fn(shape, dtype, device): - *extra_dims, num_coordinates = shape - if num_coordinates != 4: - raise pytest.UsageError() - - if any(dim == 0 for dim in extra_dims): - return datapoints.BoundingBox( - torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size - ) - - height, width = spatial_size - - if format == datapoints.BoundingBoxFormat.XYXY: - x1 = torch.randint(0, width // 2, extra_dims) - y1 = torch.randint(0, height // 2, extra_dims) - x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1 - y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1 - parts = (x1, y1, x2, y2) - elif format == datapoints.BoundingBoxFormat.XYWH: - x = torch.randint(0, width // 2, extra_dims) - y = torch.randint(0, height // 2, extra_dims) - w = randint_with_tensor_bounds(1, width - x) - h = randint_with_tensor_bounds(1, height - y) - parts = (x, y, w, h) - else: # format == features.BoundingBoxFormat.CXCYWH: - cx = torch.randint(1, width - 1, extra_dims) - cy = torch.randint(1, height - 1, extra_dims) - w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1) - h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1) - parts = (cx, cy, w, h) - - return datapoints.BoundingBox( - torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size - ) - - return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size) - - -make_bounding_box = from_loader(make_bounding_box_loader) - - -def make_bounding_box_loaders( - *, - extra_dims=DEFAULT_EXTRA_DIMS, - formats=tuple(datapoints.BoundingBoxFormat), - spatial_size="random", - dtypes=(torch.float32, torch.float64, torch.int64), -): - for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes): - yield make_bounding_box_loader(**params, spatial_size=spatial_size) +from common_utils import combinations_grid, DEFAULT_EXTRA_DIMS, from_loader, from_loaders, TensorLoader +from torch.nn.functional import one_hot -make_bounding_boxes = from_loaders(make_bounding_box_loaders) +from torchvision.prototype import datapoints @dataclasses.dataclass @@ -458,7 +40,7 @@ def fn(shape, dtype, device): # The idiom `make_tensor(..., dtype=torch.int64).to(dtype)` is intentional to only get integer values, # regardless of the requested dtype, e.g. 0 or 0.0 rather than 0 or 0.123 data = torch.testing.make_tensor(shape, low=0, high=num_categories, dtype=torch.int64, device=device).to(dtype) - return proto_datapoints.Label(data, categories=categories) + return datapoints.Label(data, categories=categories) return LabelLoader(fn, shape=extra_dims, dtype=dtype, categories=categories) @@ -482,7 +64,7 @@ def fn(shape, dtype, device): # since `one_hot` only supports int64 label = make_label_loader(extra_dims=extra_dims, categories=num_categories, dtype=torch.int64).load(device) data = one_hot(label, num_classes=num_categories).to(dtype) - return proto_datapoints.OneHotLabel(data, categories=categories) + return datapoints.OneHotLabel(data, categories=categories) return OneHotLabelLoader(fn, shape=(*extra_dims, num_categories), dtype=dtype, categories=categories) @@ -498,195 +80,3 @@ def make_one_hot_label_loaders( make_one_hot_labels = from_loaders(make_one_hot_label_loaders) - - -class MaskLoader(TensorLoader): - pass - - -def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8): - # This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects - size = _parse_spatial_size(size) - num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects - - def fn(shape, dtype, device): - data = torch.testing.make_tensor(shape, low=0, high=2, dtype=dtype, device=device) - return datapoints.Mask(data) - - return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype) - - -make_detection_mask = from_loader(make_detection_mask_loader) - - -def make_detection_mask_loaders( - sizes=DEFAULT_SPATIAL_SIZES, - num_objects=(1, 0, "random"), - extra_dims=DEFAULT_EXTRA_DIMS, - dtypes=(torch.uint8,), -): - for params in combinations_grid(size=sizes, num_objects=num_objects, extra_dims=extra_dims, dtype=dtypes): - yield make_detection_mask_loader(**params) - - -make_detection_masks = from_loaders(make_detection_mask_loaders) - - -def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8): - # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values - size = _parse_spatial_size(size) - num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories - - def fn(shape, dtype, device): - data = torch.testing.make_tensor(shape, low=0, high=num_categories, dtype=dtype, device=device) - return datapoints.Mask(data) - - return MaskLoader(fn, shape=(*extra_dims, *size), dtype=dtype) - - -make_segmentation_mask = from_loader(make_segmentation_mask_loader) - - -def make_segmentation_mask_loaders( - *, - sizes=DEFAULT_SPATIAL_SIZES, - num_categories=(1, 2, "random"), - extra_dims=DEFAULT_EXTRA_DIMS, - dtypes=(torch.uint8,), -): - for params in combinations_grid(size=sizes, num_categories=num_categories, extra_dims=extra_dims, dtype=dtypes): - yield make_segmentation_mask_loader(**params) - - -make_segmentation_masks = from_loaders(make_segmentation_mask_loaders) - - -def make_mask_loaders( - *, - sizes=DEFAULT_SPATIAL_SIZES, - num_objects=(1, 0, "random"), - num_categories=(1, 2, "random"), - extra_dims=DEFAULT_EXTRA_DIMS, - dtypes=(torch.uint8,), -): - yield from make_detection_mask_loaders(sizes=sizes, num_objects=num_objects, extra_dims=extra_dims, dtypes=dtypes) - yield from make_segmentation_mask_loaders( - sizes=sizes, num_categories=num_categories, extra_dims=extra_dims, dtypes=dtypes - ) - - -make_masks = from_loaders(make_mask_loaders) - - -class VideoLoader(ImageLoader): - pass - - -def make_video_loader( - size="random", - *, - color_space="RGB", - num_frames="random", - extra_dims=(), - dtype=torch.uint8, -): - size = _parse_spatial_size(size) - num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames - - def fn(shape, dtype, device): - video = make_image(size=shape[-2:], extra_dims=shape[:-3], dtype=dtype, device=device) - return datapoints.Video(video) - - return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype) - - -make_video = from_loader(make_video_loader) - - -def make_video_loaders( - *, - sizes=DEFAULT_SPATIAL_SIZES, - color_spaces=( - "GRAY", - "RGB", - ), - num_frames=(1, 0, "random"), - extra_dims=DEFAULT_EXTRA_DIMS, - dtypes=(torch.uint8, torch.float32, torch.float64), -): - for params in combinations_grid( - size=sizes, color_space=color_spaces, num_frames=num_frames, extra_dims=extra_dims, dtype=dtypes - ): - yield make_video_loader(**params) - - -make_videos = from_loaders(make_video_loaders) - - -class TestMark: - def __init__( - self, - # Tuple of test class name and test function name that identifies the test the mark is applied to. If there is - # no test class, i.e. a standalone test function, use `None`. - test_id, - # `pytest.mark.*` to apply, e.g. `pytest.mark.skip` or `pytest.mark.xfail` - mark, - *, - # Callable, that will be passed an `ArgsKwargs` and should return a boolean to indicate if the mark will be - # applied. If omitted, defaults to always apply. - condition=None, - ): - self.test_id = test_id - self.mark = mark - self.condition = condition or (lambda args_kwargs: True) - - -def mark_framework_limitation(test_id, reason, condition=None): - # The purpose of this function is to have a single entry point for skip marks that are only there, because the test - # framework cannot handle the kernel in general or a specific parameter combination. - # As development progresses, we can change the `mark.skip` to `mark.xfail` from time to time to see if the skip is - # still justified. - # We don't want to use `mark.xfail` all the time, because that actually runs the test until an error happens. Thus, - # we are wasting CI resources for no reason for most of the time - return TestMark(test_id, pytest.mark.skip(reason=reason), condition=condition) - - -class InfoBase: - def __init__( - self, - *, - # Identifier if the info that shows up the parametrization. - id, - # Test markers that will be (conditionally) applied to an `ArgsKwargs` parametrization. - # See the `TestMark` class for details - test_marks=None, - # Additional parameters, e.g. `rtol=1e-3`, passed to `assert_close`. Keys are a 3-tuple of `test_id` (see - # `TestMark`), the dtype, and the device. - closeness_kwargs=None, - ): - self.id = id - - self.test_marks = test_marks or [] - test_marks_map = defaultdict(list) - for test_mark in self.test_marks: - test_marks_map[test_mark.test_id].append(test_mark) - self._test_marks_map = dict(test_marks_map) - - self.closeness_kwargs = closeness_kwargs or dict() - - def get_marks(self, test_id, args_kwargs): - return [ - test_mark.mark for test_mark in self._test_marks_map.get(test_id, []) if test_mark.condition(args_kwargs) - ] - - def get_closeness_kwargs(self, test_id, *, dtype, device): - if not (isinstance(test_id, tuple) and len(test_id) == 2): - msg = "`test_id` should be a `Tuple[Optional[str], str]` denoting the test class and function name" - if callable(test_id): - msg += ". Did you forget to add the `test_id` fixture to parameters of the test?" - else: - msg += f", but got {test_id} instead." - raise pytest.UsageError(msg) - if isinstance(device, torch.device): - device = device.type - return self.closeness_kwargs.get((test_id, dtype, device), dict()) diff --git a/test/test_datasets.py b/test/test_datasets.py index 015f727a17a..6179c00b8e6 100644 --- a/test/test_datasets.py +++ b/test/test_datasets.py @@ -21,12 +21,13 @@ import pytest import torch import torch.nn.functional as F +from common_utils import combinations_grid from torchvision import datasets class STL10TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.STL10 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test", "unlabeled", "train+unlabeled")) @staticmethod def _make_binary_file(num_elements, root, name): @@ -112,9 +113,7 @@ class Caltech101TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.Caltech101 FEATURE_TYPES = (PIL.Image.Image, (int, np.ndarray, tuple)) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - target_type=("category", "annotation", ["category", "annotation"]) - ) + ADDITIONAL_CONFIGS = combinations_grid(target_type=("category", "annotation", ["category", "annotation"])) REQUIRED_PACKAGES = ("scipy",) def inject_fake_data(self, tmpdir, config): @@ -207,7 +206,7 @@ def inject_fake_data(self, tmpdir, config): class WIDERFaceTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.WIDERFace FEATURE_TYPES = (PIL.Image.Image, (dict, type(None))) # test split returns None as target - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test")) def inject_fake_data(self, tmpdir, config): widerface_dir = pathlib.Path(tmpdir) / "widerface" @@ -268,8 +267,8 @@ class CityScapesTestCase(datasets_utils.ImageDatasetTestCase): "color", ) ADDITIONAL_CONFIGS = ( - *datasets_utils.combinations_grid(mode=("fine",), split=("train", "test", "val"), target_type=TARGET_TYPES), - *datasets_utils.combinations_grid( + *combinations_grid(mode=("fine",), split=("train", "test", "val"), target_type=TARGET_TYPES), + *combinations_grid( mode=("coarse",), split=("train", "train_extra", "val"), target_type=TARGET_TYPES, @@ -386,7 +385,7 @@ def test_feature_types_target_polygon(self): class ImageNetTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.ImageNet REQUIRED_PACKAGES = ("scipy",) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val")) def inject_fake_data(self, tmpdir, config): tmpdir = pathlib.Path(tmpdir) @@ -416,7 +415,7 @@ def inject_fake_data(self, tmpdir, config): class CIFAR10TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.CIFAR10 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + ADDITIONAL_CONFIGS = combinations_grid(train=(True, False)) _VERSION_CONFIG = dict( base_folder="cifar-10-batches-py", @@ -489,7 +488,7 @@ class CelebATestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.CelebA FEATURE_TYPES = (PIL.Image.Image, (torch.Tensor, int, tuple, type(None))) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( split=("train", "valid", "test", "all"), target_type=("attr", "identity", "bbox", "landmarks", ["attr", "identity"]), ) @@ -613,9 +612,7 @@ class VOCSegmentationTestCase(datasets_utils.ImageDatasetTestCase): FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image) ADDITIONAL_CONFIGS = ( - *datasets_utils.combinations_grid( - year=[f"20{year:02d}" for year in range(7, 13)], image_set=("train", "val", "trainval") - ), + *combinations_grid(year=[f"20{year:02d}" for year in range(7, 13)], image_set=("train", "val", "trainval")), dict(year="2007", image_set="test"), ) @@ -790,7 +787,7 @@ class CocoCaptionsTestCase(CocoDetectionTestCase): def _create_annotations(self, image_ids, num_annotations_per_image): captions = [str(idx) for idx in range(num_annotations_per_image)] - annotations = datasets_utils.combinations_grid(image_id=image_ids, caption=captions) + annotations = combinations_grid(image_id=image_ids, caption=captions) for id, annotation in enumerate(annotations): annotation["id"] = id return annotations, dict(captions=captions) @@ -804,7 +801,7 @@ def test_captions(self): class UCF101TestCase(datasets_utils.VideoDatasetTestCase): DATASET_CLASS = datasets.UCF101 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) + ADDITIONAL_CONFIGS = combinations_grid(fold=(1, 2, 3), train=(True, False)) _VIDEO_FOLDER = "videos" _ANNOTATIONS_FOLDER = "annotations" @@ -865,9 +862,7 @@ class LSUNTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.LSUN REQUIRED_PACKAGES = ("lmdb",) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( - classes=("train", "test", "val", ["bedroom_train", "church_outdoor_train"]) - ) + ADDITIONAL_CONFIGS = combinations_grid(classes=("train", "test", "val", ["bedroom_train", "church_outdoor_train"])) _CATEGORIES = ( "bedroom", @@ -952,7 +947,7 @@ def test_not_found_or_corrupted(self): class KineticsTestCase(datasets_utils.VideoDatasetTestCase): DATASET_CLASS = datasets.Kinetics - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val"), num_classes=("400", "600", "700")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val"), num_classes=("400", "600", "700")) def inject_fake_data(self, tmpdir, config): classes = ("Abseiling", "Zumba") @@ -972,7 +967,7 @@ def inject_fake_data(self, tmpdir, config): class HMDB51TestCase(datasets_utils.VideoDatasetTestCase): DATASET_CLASS = datasets.HMDB51 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(fold=(1, 2, 3), train=(True, False)) + ADDITIONAL_CONFIGS = combinations_grid(fold=(1, 2, 3), train=(True, False)) _VIDEO_FOLDER = "videos" _SPLITS_FOLDER = "splits" @@ -1032,7 +1027,7 @@ def _create_split_files(self, root, video_files, fold, train): class OmniglotTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.Omniglot - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(background=(True, False)) + ADDITIONAL_CONFIGS = combinations_grid(background=(True, False)) def inject_fake_data(self, tmpdir, config): target_folder = ( @@ -1112,7 +1107,7 @@ def inject_fake_data(self, tmpdir, config): class USPSTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.USPS - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + ADDITIONAL_CONFIGS = combinations_grid(train=(True, False)) def inject_fake_data(self, tmpdir, config): num_images = 2 if config["train"] else 1 @@ -1134,7 +1129,7 @@ class SBDatasetTestCase(datasets_utils.ImageDatasetTestCase): REQUIRED_PACKAGES = ("scipy.io", "scipy.sparse") - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( image_set=("train", "val", "train_noval"), mode=("boundaries", "segmentation") ) @@ -1220,7 +1215,7 @@ class PhotoTourTestCase(datasets_utils.ImageDatasetTestCase): _TRAIN_FEATURE_TYPES = (torch.Tensor,) _TEST_FEATURE_TYPES = (torch.Tensor, torch.Tensor, torch.Tensor) - datasets_utils.combinations_grid(train=(True, False)) + combinations_grid(train=(True, False)) _NAME = "liberty" @@ -1379,7 +1374,7 @@ def _create_annotations_file(self, root, name, images, num_captions_per_image): class MNISTTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.MNIST - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + ADDITIONAL_CONFIGS = combinations_grid(train=(True, False)) _MAGIC_DTYPES = { torch.uint8: 8, @@ -1449,7 +1444,7 @@ class EMNISTTestCase(MNISTTestCase): DATASET_CLASS = datasets.EMNIST DEFAULT_CONFIG = dict(split="byclass") - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( split=("byclass", "bymerge", "balanced", "letters", "digits", "mnist"), train=(True, False) ) @@ -1460,7 +1455,7 @@ def _prefix(self, config): class QMNISTTestCase(MNISTTestCase): DATASET_CLASS = datasets.QMNIST - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(what=("train", "test", "test10k", "nist")) + ADDITIONAL_CONFIGS = combinations_grid(what=("train", "test", "test10k", "nist")) _LABELS_SIZE = (8,) _LABELS_DTYPE = torch.int32 @@ -1506,7 +1501,7 @@ class MovingMNISTTestCase(datasets_utils.DatasetTestCase): DATASET_CLASS = datasets.MovingMNIST FEATURE_TYPES = (torch.Tensor,) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=(None, "train", "test"), split_ratio=(10, 1, 19)) + ADDITIONAL_CONFIGS = combinations_grid(split=(None, "train", "test"), split_ratio=(10, 1, 19)) def inject_fake_data(self, tmpdir, config): base_folder = os.path.join(tmpdir, self.DATASET_CLASS.__name__) @@ -1542,7 +1537,7 @@ class DatasetFolderTestCase(datasets_utils.ImageDatasetTestCase): # We only iterate over different 'extensions' here and handle the tests for 'is_valid_file' in the # 'test_is_valid_file()' method. DEFAULT_CONFIG = dict(extensions=_EXTENSIONS) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(extensions=[(ext,) for ext in _EXTENSIONS]) + ADDITIONAL_CONFIGS = combinations_grid(extensions=[(ext,) for ext in _EXTENSIONS]) def dataset_args(self, tmpdir, config): return tmpdir, datasets.folder.pil_loader @@ -1611,7 +1606,7 @@ def test_classes(self, config): class KittiTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.Kitti FEATURE_TYPES = (PIL.Image.Image, (list, type(None))) # test split returns None as target - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(train=(True, False)) + ADDITIONAL_CONFIGS = combinations_grid(train=(True, False)) def inject_fake_data(self, tmpdir, config): kitti_dir = os.path.join(tmpdir, "Kitti", "raw") @@ -1647,7 +1642,7 @@ def inject_fake_data(self, tmpdir, config): class SvhnTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.SVHN REQUIRED_PACKAGES = ("scipy",) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test", "extra")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test", "extra")) def inject_fake_data(self, tmpdir, config): import scipy.io as sio @@ -1668,7 +1663,7 @@ def inject_fake_data(self, tmpdir, config): class Places365TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.Places365 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( split=("train-standard", "train-challenge", "val"), small=(False, True), ) @@ -1760,7 +1755,7 @@ class INaturalistTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.INaturalist FEATURE_TYPES = (PIL.Image.Image, (int, tuple)) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( target_type=("kingdom", "full", "genus", ["kingdom", "phylum", "class", "order", "family", "genus", "full"]), version=("2021_train",), ) @@ -1797,7 +1792,7 @@ def test_targets(self): class LFWPeopleTestCase(datasets_utils.DatasetTestCase): DATASET_CLASS = datasets.LFWPeople FEATURE_TYPES = (PIL.Image.Image, int) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( split=("10fold", "train", "test"), image_set=("original", "funneled", "deepfunneled") ) _IMAGES_DIR = {"original": "lfw", "funneled": "lfw_funneled", "deepfunneled": "lfw-deepfunneled"} @@ -1873,7 +1868,7 @@ def _inject_pairs(self, root, num_pairs, same): class SintelTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.Sintel - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test"), pass_name=("clean", "final", "both")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test"), pass_name=("clean", "final", "both")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) FLOW_H, FLOW_W = 3, 4 @@ -1941,7 +1936,7 @@ def test_bad_input(self): class KittiFlowTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.KittiFlow - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) def inject_fake_data(self, tmpdir, config): @@ -2001,7 +1996,7 @@ def test_bad_input(self): class FlyingChairsTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.FlyingChairs - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) FLOW_H, FLOW_W = 3, 4 @@ -2056,7 +2051,7 @@ def test_flow(self, config): class FlyingThings3DTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.FlyingThings3D - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( split=("train", "test"), pass_name=("clean", "final", "both"), camera=("left", "right", "both") ) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) @@ -2193,7 +2188,7 @@ class Food101TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.Food101 FEATURE_TYPES = (PIL.Image.Image, int) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) def inject_fake_data(self, tmpdir: str, config): root_folder = pathlib.Path(tmpdir) / "food-101" @@ -2228,7 +2223,7 @@ def inject_fake_data(self, tmpdir: str, config): class FGVCAircraftTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.FGVCAircraft - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( split=("train", "val", "trainval", "test"), annotation_level=("variant", "family", "manufacturer") ) @@ -2311,7 +2306,7 @@ class DTDTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.DTD FEATURE_TYPES = (PIL.Image.Image, int) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( split=("train", "test", "val"), # There is no need to test the whole matrix here, since each fold is treated exactly the same partition=(1, 5, 10), @@ -2345,7 +2340,7 @@ def inject_fake_data(self, tmpdir: str, config): class FER2013TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.FER2013 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) @@ -2380,7 +2375,7 @@ class GTSRBTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.GTSRB FEATURE_TYPES = (PIL.Image.Image, int) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) def inject_fake_data(self, tmpdir: str, config): root_folder = os.path.join(tmpdir, "gtsrb") @@ -2430,7 +2425,7 @@ class CLEVRClassificationTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.CLEVRClassification FEATURE_TYPES = (PIL.Image.Image, (int, type(None))) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test")) def inject_fake_data(self, tmpdir, config): data_folder = pathlib.Path(tmpdir) / "clevr" / "CLEVR_v1.0" @@ -2462,7 +2457,7 @@ class OxfordIIITPetTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.OxfordIIITPet FEATURE_TYPES = (PIL.Image.Image, (int, PIL.Image.Image, tuple, type(None))) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( split=("trainval", "test"), target_types=("category", "segmentation", ["category", "segmentation"], []), ) @@ -2521,7 +2516,7 @@ def _meta_to_split_and_classification_ann(self, meta, idx): class StanfordCarsTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.StanfordCars REQUIRED_PACKAGES = ("scipy",) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) def inject_fake_data(self, tmpdir, config): import scipy.io as io @@ -2565,7 +2560,7 @@ def inject_fake_data(self, tmpdir, config): class Country211TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.Country211 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "valid", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "valid", "test")) def inject_fake_data(self, tmpdir: str, config): split_folder = pathlib.Path(tmpdir) / "country211" / config["split"] @@ -2592,7 +2587,7 @@ def inject_fake_data(self, tmpdir: str, config): class Flowers102TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.Flowers102 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test")) REQUIRED_PACKAGES = ("scipy",) def inject_fake_data(self, tmpdir: str, config): @@ -2628,7 +2623,7 @@ def inject_fake_data(self, tmpdir: str, config): class PCAMTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.PCAM - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test")) REQUIRED_PACKAGES = ("h5py",) def inject_fake_data(self, tmpdir: str, config): @@ -2650,7 +2645,7 @@ def inject_fake_data(self, tmpdir: str, config): class RenderedSST2TestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.RenderedSST2 - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "val", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "val", "test")) SPLIT_TO_FOLDER = {"train": "train", "val": "valid", "test": "test"} def inject_fake_data(self, tmpdir: str, config): @@ -2672,7 +2667,7 @@ def inject_fake_data(self, tmpdir: str, config): class Kitti2012StereoTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.Kitti2012Stereo - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) def inject_fake_data(self, tmpdir, config): @@ -2734,7 +2729,7 @@ def test_bad_input(self): class Kitti2015StereoTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.Kitti2015Stereo - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) def inject_fake_data(self, tmpdir, config): @@ -2872,7 +2867,7 @@ def test_splits(self): class FallingThingsStereoTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.FallingThingsStereo - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(variant=("single", "mixed", "both")) + ADDITIONAL_CONFIGS = combinations_grid(variant=("single", "mixed", "both")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) @staticmethod @@ -2946,7 +2941,7 @@ def test_bad_input(self): class SceneFlowStereoTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.SceneFlowStereo - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( variant=("FlyingThings3D", "Driving", "Monkaa"), pass_name=("clean", "final", "both") ) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) @@ -3033,7 +3028,7 @@ def test_bad_input(self): class InStereo2k(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.InStereo2k FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None))) - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) @staticmethod def _make_scene_folder(root: str, name: str, size: Tuple[int, int]): @@ -3075,7 +3070,7 @@ def test_bad_input(self): class SintelStereoTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.SintelStereo - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(pass_name=("final", "clean", "both")) + ADDITIONAL_CONFIGS = combinations_grid(pass_name=("final", "clean", "both")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) def inject_fake_data(self, tmpdir, config): @@ -3151,7 +3146,7 @@ def test_bad_input(self): class ETH3DStereoestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.ETH3DStereo - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid(split=("train", "test")) + ADDITIONAL_CONFIGS = combinations_grid(split=("train", "test")) FEATURE_TYPES = (PIL.Image.Image, PIL.Image.Image, (np.ndarray, type(None)), (np.ndarray, type(None))) @staticmethod @@ -3218,7 +3213,7 @@ def test_bad_input(self): class Middlebury2014StereoTestCase(datasets_utils.ImageDatasetTestCase): DATASET_CLASS = datasets.Middlebury2014Stereo - ADDITIONAL_CONFIGS = datasets_utils.combinations_grid( + ADDITIONAL_CONFIGS = combinations_grid( split=("train", "additional"), calibration=("perfect", "imperfect", "both"), use_ambient_views=(True, False), diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index 8b1665a3d31..779ae6c07ae 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -15,9 +15,9 @@ import torchvision.transforms.v2 as transforms import torchvision.transforms.v2.utils -from common_utils import cpu_and_gpu -from prototype_common_utils import ( +from common_utils import ( assert_equal, + cpu_and_gpu, DEFAULT_EXTRA_DIMS, make_bounding_box, make_bounding_boxes, diff --git a/test/test_prototype_transforms_consistency.py b/test/test_transforms_v2_consistency.py similarity index 99% rename from test/test_prototype_transforms_consistency.py rename to test/test_transforms_v2_consistency.py index ebee2eec58f..df4d7a1a60c 100644 --- a/test/test_prototype_transforms_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -12,9 +12,8 @@ import pytest import torch -import torchvision.prototype.transforms as prototype_transforms import torchvision.transforms.v2 as v2_transforms -from prototype_common_utils import ( +from common_utils import ( ArgsKwargs, assert_close, assert_equal, @@ -22,7 +21,6 @@ make_detection_mask, make_image, make_images, - make_label, make_segmentation_mask, ) from torch import nn @@ -1056,6 +1054,9 @@ def make_datapoints(self, with_mask=True): size = (600, 800) num_objects = 22 + def make_label(extra_dims, categories): + return torch.randint(categories, extra_dims, dtype=torch.int64) + pil_image = to_image_pil(make_image(size=size, color_space="RGB")) target = { "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), @@ -1102,11 +1103,12 @@ def make_datapoints(self, with_mask=True): ), (det_transforms.RandomZoomOut(), v2_transforms.RandomZoomOut(), {"with_mask": False}), (det_transforms.ScaleJitter((1024, 1024)), v2_transforms.ScaleJitter((1024, 1024)), {}), - ( - det_transforms.FixedSizeCrop((1024, 1024), fill=0), - prototype_transforms.FixedSizeCrop((1024, 1024), fill=0), - {}, - ), + # FIXME + # ( + # det_transforms.FixedSizeCrop((1024, 1024), fill=0), + # prototype_transforms.FixedSizeCrop((1024, 1024), fill=0), + # {}, + # ), ( det_transforms.RandomShortestSize( min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333 diff --git a/test/test_prototype_transforms_functional.py b/test/test_transforms_v2_functional.py similarity index 99% rename from test/test_prototype_transforms_functional.py rename to test/test_transforms_v2_functional.py index 7dff7a509ad..fbec70e6a9c 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_transforms_v2_functional.py @@ -11,15 +11,16 @@ import torch -from common_utils import cache, cpu_and_gpu, needs_cuda, set_rng_seed -from prototype_common_utils import ( +from common_utils import ( assert_close, + cache, + cpu_and_gpu, DEFAULT_SQUARE_SPATIAL_SIZE, make_bounding_boxes, + needs_cuda, parametrized_error_message, + set_rng_seed, ) -from prototype_transforms_dispatcher_infos import DISPATCHER_INFOS -from prototype_transforms_kernel_infos import KERNEL_INFOS from torch.utils._pytree import tree_map from torchvision import datapoints from torchvision.transforms.functional import _get_perspective_coeffs @@ -27,6 +28,8 @@ from torchvision.transforms.v2.functional._geometry import _center_crop_compute_padding from torchvision.transforms.v2.functional._meta import clamp_bounding_box, convert_format_bounding_box from torchvision.transforms.v2.utils import is_simple_tensor +from transforms_v2_dispatcher_infos import DISPATCHER_INFOS +from transforms_v2_kernel_infos import KERNEL_INFOS KERNEL_INFOS_MAP = {info.kernel: info for info in KERNEL_INFOS} @@ -619,7 +622,7 @@ def test_datapoint_explicit_metadata(self): # TODO: All correctness checks below this line should be ported to be references on a `KernelInfo` in -# `prototype_transforms_kernel_infos.py` +# `transforms_v2_kernel_infos.py` def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_): diff --git a/test/test_prototype_transforms_utils.py b/test/test_transforms_v2_utils.py similarity index 97% rename from test/test_prototype_transforms_utils.py rename to test/test_transforms_v2_utils.py index c9d37466046..198ab39a475 100644 --- a/test/test_prototype_transforms_utils.py +++ b/test/test_transforms_v2_utils.py @@ -4,7 +4,7 @@ import torch import torchvision.transforms.v2.utils -from prototype_common_utils import make_bounding_box, make_detection_mask, make_image +from common_utils import make_bounding_box, make_detection_mask, make_image from torchvision import datapoints from torchvision.transforms.v2.functional import to_image_pil diff --git a/test/prototype_transforms_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py similarity index 98% rename from test/prototype_transforms_dispatcher_infos.py rename to test/transforms_v2_dispatcher_infos.py index 308f787ba6b..1d9dd025254 100644 --- a/test/prototype_transforms_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -2,9 +2,9 @@ import pytest import torchvision.transforms.v2.functional as F -from prototype_common_utils import InfoBase, TestMark -from prototype_transforms_kernel_infos import KERNEL_INFOS, pad_xfail_jit_fill_condition +from common_utils import InfoBase, TestMark from torchvision import datapoints +from transforms_v2_kernel_infos import KERNEL_INFOS, pad_xfail_jit_fill_condition __all__ = ["DispatcherInfo", "DISPATCHER_INFOS"] @@ -49,7 +49,7 @@ def __init__( if not kernel_info: raise pytest.UsageError( f"Can't register {kernel.__name__} for type {datapoint_type} since there is no `KernelInfo` for it. " - f"Please add a `KernelInfo` for it in `prototype_transforms_kernel_infos.py`." + f"Please add a `KernelInfo` for it in `transforms_v2_kernel_infos.py`." ) kernel_infos[datapoint_type] = kernel_info self.kernel_infos = kernel_infos diff --git a/test/prototype_transforms_kernel_infos.py b/test/transforms_v2_kernel_infos.py similarity index 99% rename from test/prototype_transforms_kernel_infos.py rename to test/transforms_v2_kernel_infos.py index 5e8be39aef6..a14ce27d38e 100644 --- a/test/prototype_transforms_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -9,9 +9,9 @@ import torch.testing import torchvision.ops import torchvision.transforms.v2.functional as F -from datasets_utils import combinations_grid -from prototype_common_utils import ( +from common_utils import ( ArgsKwargs, + combinations_grid, get_num_channels, ImageLoader, InfoBase, From da408f23307ef11569f1ec3a1af342a345abaa1c Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 17 Feb 2023 11:00:33 +0100 Subject: [PATCH 03/10] split transforms tests --- test/test_prototype_transforms.py | 2226 ++--------------------------- test/test_transforms_v2.py | 2032 ++++++++++++++++++++++++++ 2 files changed, 2177 insertions(+), 2081 deletions(-) create mode 100644 test/test_transforms_v2.py diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index 779ae6c07ae..255c3b5c32f 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -1,61 +1,32 @@ import itertools -import pathlib -import random -import re -import warnings -from collections import defaultdict -import numpy as np +import re import PIL.Image import pytest import torch -import torchvision.prototype.datapoints as proto_datapoints -import torchvision.prototype.transforms as proto_transforms -import torchvision.transforms.v2 as transforms -import torchvision.transforms.v2.utils from common_utils import ( assert_equal, - cpu_and_gpu, DEFAULT_EXTRA_DIMS, make_bounding_box, - make_bounding_boxes, make_detection_mask, make_image, make_images, - make_label, - make_one_hot_labels, make_segmentation_mask, make_video, make_videos, ) -from torch.utils._pytree import tree_flatten, tree_unflatten -from torchvision import datapoints -from torchvision.ops.boxes import box_iou -from torchvision.transforms.functional import InterpolationMode, pil_to_tensor, to_pil_image -from torchvision.transforms.v2 import functional as F -from torchvision.transforms.v2._utils import _convert_fill_arg -from torchvision.transforms.v2.utils import check_type, is_simple_tensor, query_chw - -BATCH_EXTRA_DIMS = [extra_dims for extra_dims in DEFAULT_EXTRA_DIMS if extra_dims] - - -def make_vanilla_tensor_images(*args, **kwargs): - for image in make_images(*args, **kwargs): - if image.ndim > 3: - continue - yield image.data - -def make_pil_images(*args, **kwargs): - for image in make_vanilla_tensor_images(*args, **kwargs): - yield to_pil_image(image) +from prototype_common_utils import make_label, make_one_hot_labels +from torchvision.datapoints import BoundingBox, BoundingBoxFormat, Image, Mask, Video +from torchvision.prototype import datapoints, transforms +from torchvision.transforms.v2._utils import _convert_fill_arg +from torchvision.transforms.v2.functional import InterpolationMode, pil_to_tensor, to_image_pil +from torchvision.transforms.v2.utils import check_type, is_simple_tensor -def make_vanilla_tensor_bounding_boxes(*args, **kwargs): - for bounding_box in make_bounding_boxes(*args, **kwargs): - yield bounding_box.data +BATCH_EXTRA_DIMS = [extra_dims for extra_dims in DEFAULT_EXTRA_DIMS if extra_dims] def parametrize(transforms_with_inputs): @@ -73,1526 +44,47 @@ def parametrize(transforms_with_inputs): ) -def auto_augment_adapter(transform, input, device): - adapted_input = {} - image_or_video_found = False - for key, value in input.items(): - if isinstance(value, (datapoints.BoundingBox, datapoints.Mask)): - # AA transforms don't support bounding boxes or masks - continue - elif check_type(value, (datapoints.Image, datapoints.Video, is_simple_tensor, PIL.Image.Image)): - if image_or_video_found: - # AA transforms only support a single image or video - continue - image_or_video_found = True - adapted_input[key] = value - return adapted_input - - -def linear_transformation_adapter(transform, input, device): - flat_inputs = list(input.values()) - c, h, w = query_chw( - [ - item - for item, needs_transform in zip(flat_inputs, transforms.Transform()._needs_transform_list(flat_inputs)) - if needs_transform - ] - ) - num_elements = c * h * w - transform.transformation_matrix = torch.randn((num_elements, num_elements), device=device) - transform.mean_vector = torch.randn((num_elements,), device=device) - return {key: value for key, value in input.items() if not isinstance(value, PIL.Image.Image)} - - -def normalize_adapter(transform, input, device): - adapted_input = {} - for key, value in input.items(): - if isinstance(value, PIL.Image.Image): - # normalize doesn't support PIL images - continue - elif check_type(value, (datapoints.Image, datapoints.Video, is_simple_tensor)): - # normalize doesn't support integer images - value = F.convert_dtype(value, torch.float32) - adapted_input[key] = value - return adapted_input - - -class TestSmoke: - @pytest.mark.parametrize( - ("transform", "adapter"), - [ - (transforms.RandomErasing(p=1.0), None), - (transforms.AugMix(), auto_augment_adapter), - (transforms.AutoAugment(), auto_augment_adapter), - (transforms.RandAugment(), auto_augment_adapter), - (transforms.TrivialAugmentWide(), auto_augment_adapter), - (transforms.ColorJitter(brightness=0.1, contrast=0.2, saturation=0.3, hue=0.15), None), - (transforms.Grayscale(), None), - (transforms.RandomAdjustSharpness(sharpness_factor=0.5, p=1.0), None), - (transforms.RandomAutocontrast(p=1.0), None), - (transforms.RandomEqualize(p=1.0), None), - (transforms.RandomGrayscale(p=1.0), None), - (transforms.RandomInvert(p=1.0), None), - (transforms.RandomPhotometricDistort(p=1.0), None), - (transforms.RandomPosterize(bits=4, p=1.0), None), - (transforms.RandomSolarize(threshold=0.5, p=1.0), None), - (transforms.CenterCrop([16, 16]), None), - (transforms.ElasticTransform(sigma=1.0), None), - (transforms.Pad(4), None), - (transforms.RandomAffine(degrees=30.0), None), - (transforms.RandomCrop([16, 16], pad_if_needed=True), None), - (transforms.RandomHorizontalFlip(p=1.0), None), - (transforms.RandomPerspective(p=1.0), None), - (transforms.RandomResize(min_size=10, max_size=20), None), - (transforms.RandomResizedCrop([16, 16]), None), - (transforms.RandomRotation(degrees=30), None), - (transforms.RandomShortestSize(min_size=10), None), - (transforms.RandomVerticalFlip(p=1.0), None), - (transforms.RandomZoomOut(p=1.0), None), - (transforms.Resize([16, 16], antialias=True), None), - (transforms.ScaleJitter((16, 16), scale_range=(0.8, 1.2)), None), - (transforms.ClampBoundingBox(), None), - (transforms.ConvertBoundingBoxFormat(datapoints.BoundingBoxFormat.CXCYWH), None), - (transforms.ConvertDtype(), None), - (transforms.GaussianBlur(kernel_size=3), None), - ( - transforms.LinearTransformation( - # These are just dummy values that will be filled by the adapter. We can't define them upfront, - # because for we neither know the spatial size nor the device at this point - transformation_matrix=torch.empty((1, 1)), - mean_vector=torch.empty((1,)), - ), - linear_transformation_adapter, - ), - (transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), normalize_adapter), - (transforms.ToDtype(torch.float64), None), - (transforms.UniformTemporalSubsample(num_samples=2), None), - ], - ids=lambda transform: type(transform).__name__, - ) - @pytest.mark.parametrize("container_type", [dict, list, tuple]) - @pytest.mark.parametrize( - "image_or_video", - [ - make_image(), - make_video(), - next(make_pil_images(color_spaces=["RGB"])), - next(make_vanilla_tensor_images()), - ], - ) - @pytest.mark.parametrize("device", cpu_and_gpu()) - def test_common(self, transform, adapter, container_type, image_or_video, device): - spatial_size = F.get_spatial_size(image_or_video) - input = dict( - image_or_video=image_or_video, - image_datapoint=make_image(size=spatial_size), - video_datapoint=make_video(size=spatial_size), - image_pil=next(make_pil_images(sizes=[spatial_size], color_spaces=["RGB"])), - bounding_box_xyxy=make_bounding_box( - format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(3,) - ), - bounding_box_xywh=make_bounding_box( - format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, extra_dims=(4,) - ), - bounding_box_cxcywh=make_bounding_box( - format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, extra_dims=(5,) - ), - bounding_box_degenerate_xyxy=datapoints.BoundingBox( - [ - [0, 0, 0, 0], # no height or width - [0, 0, 0, 1], # no height - [0, 0, 1, 0], # no width - [2, 0, 1, 1], # x1 > x2, y1 < y2 - [0, 2, 1, 1], # x1 < x2, y1 > y2 - [2, 2, 1, 1], # x1 > x2, y1 > y2 - ], - format=datapoints.BoundingBoxFormat.XYXY, - spatial_size=spatial_size, - ), - bounding_box_degenerate_xywh=datapoints.BoundingBox( - [ - [0, 0, 0, 0], # no height or width - [0, 0, 0, 1], # no height - [0, 0, 1, 0], # no width - [0, 0, 1, -1], # negative height - [0, 0, -1, 1], # negative width - [0, 0, -1, -1], # negative height and width - ], - format=datapoints.BoundingBoxFormat.XYWH, - spatial_size=spatial_size, - ), - bounding_box_degenerate_cxcywh=datapoints.BoundingBox( - [ - [0, 0, 0, 0], # no height or width - [0, 0, 0, 1], # no height - [0, 0, 1, 0], # no width - [0, 0, 1, -1], # negative height - [0, 0, -1, 1], # negative width - [0, 0, -1, -1], # negative height and width - ], - format=datapoints.BoundingBoxFormat.CXCYWH, - spatial_size=spatial_size, - ), - detection_mask=make_detection_mask(size=spatial_size), - segmentation_mask=make_segmentation_mask(size=spatial_size), - int=0, - float=0.0, - bool=True, - none=None, - str="str", - path=pathlib.Path.cwd(), - object=object(), - tensor=torch.empty(5), - array=np.empty(5), +@parametrize( + [ + ( + transform, + [ + dict(inpt=inpt, one_hot_label=one_hot_label) + for inpt, one_hot_label in itertools.product( + itertools.chain( + make_images(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]), + make_videos(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]), + ), + make_one_hot_labels(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]), + ) + ], ) - if adapter is not None: - input = adapter(transform, input, device) - - if container_type in {tuple, list}: - input = container_type(input.values()) - - input_flat, input_spec = tree_flatten(input) - input_flat = [item.to(device) if isinstance(item, torch.Tensor) else item for item in input_flat] - input = tree_unflatten(input_flat, input_spec) - - torch.manual_seed(0) - output = transform(input) - output_flat, output_spec = tree_flatten(output) - - assert output_spec == input_spec - - for output_item, input_item, should_be_transformed in zip( - output_flat, input_flat, transforms.Transform()._needs_transform_list(input_flat) - ): - if should_be_transformed: - assert type(output_item) is type(input_item) - else: - assert output_item is input_item - - @parametrize( - [ - ( - transform, - [ - dict(inpt=inpt, one_hot_label=one_hot_label) - for inpt, one_hot_label in itertools.product( - itertools.chain( - make_images(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]), - make_videos(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]), - ), - make_one_hot_labels(extra_dims=BATCH_EXTRA_DIMS, dtypes=[torch.float]), - ) - ], - ) - for transform in [ - proto_transforms.RandomMixup(alpha=1.0), - proto_transforms.RandomCutmix(alpha=1.0), - ] - ] - ) - def test_mixup_cutmix(self, transform, input): - transform(input) - - # add other data that should bypass and won't raise any error - input_copy = dict(input) - input_copy["path"] = "/path/to/somewhere" - input_copy["num"] = 1234 - transform(input_copy) - - # Check if we raise an error if sample contains bbox or mask or label - err_msg = "does not support PIL images, bounding boxes, masks and plain labels" - input_copy = dict(input) - for unsup_data in [ - make_label(), - make_bounding_box(format="XYXY"), - make_detection_mask(), - make_segmentation_mask(), - ]: - input_copy["unsupported"] = unsup_data - with pytest.raises(TypeError, match=err_msg): - transform(input_copy) - - @parametrize( - [ - ( - transform, - itertools.chain.from_iterable( - fn( - color_spaces=[ - "GRAY", - "RGB", - ], - dtypes=[torch.uint8], - extra_dims=[(), (4,)], - **(dict(num_frames=["random"]) if fn is make_videos else dict()), - ) - for fn in [ - make_images, - make_vanilla_tensor_images, - make_pil_images, - make_videos, - ] - ), - ) - for transform in ( - transforms.RandAugment(), - transforms.TrivialAugmentWide(), - transforms.AutoAugment(), - transforms.AugMix(), - ) - ] - ) - def test_auto_augment(self, transform, input): - transform(input) - - @parametrize( - [ - ( - transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]), - itertools.chain.from_iterable( - fn(color_spaces=["RGB"], dtypes=[torch.float32]) - for fn in [ - make_images, - make_vanilla_tensor_images, - make_videos, - ] - ), - ), + for transform in [ + transforms.RandomMixup(alpha=1.0), + transforms.RandomCutmix(alpha=1.0), ] - ) - def test_normalize(self, transform, input): - transform(input) - - @parametrize( - [ - ( - transforms.RandomResizedCrop([16, 16], antialias=True), - itertools.chain( - make_images(extra_dims=[(4,)]), - make_vanilla_tensor_images(), - make_pil_images(), - make_videos(extra_dims=[()]), - ), - ) - ] - ) - def test_random_resized_crop(self, transform, input): - transform(input) - - -@pytest.mark.parametrize( - "flat_inputs", - itertools.permutations( - [ - next(make_vanilla_tensor_images()), - next(make_vanilla_tensor_images()), - next(make_pil_images()), - make_image(), - next(make_videos()), - ], - 3, - ), + ] ) -def test_simple_tensor_heuristic(flat_inputs): - def split_on_simple_tensor(to_split): - # This takes a sequence that is structurally aligned with `flat_inputs` and splits its items into three parts: - # 1. The first simple tensor. If none is present, this will be `None` - # 2. A list of the remaining simple tensors - # 3. A list of all other items - simple_tensors = [] - others = [] - # Splitting always happens on the original `flat_inputs` to avoid any erroneous type changes by the transform to - # affect the splitting. - for item, inpt in zip(to_split, flat_inputs): - (simple_tensors if is_simple_tensor(inpt) else others).append(item) - return simple_tensors[0] if simple_tensors else None, simple_tensors[1:], others - - class CopyCloneTransform(transforms.Transform): - def _transform(self, inpt, params): - return inpt.clone() if isinstance(inpt, torch.Tensor) else inpt.copy() - - @staticmethod - def was_applied(output, inpt): - identity = output is inpt - if identity: - return False - - # Make sure nothing fishy is going on - assert_equal(output, inpt) - return True - - first_simple_tensor_input, other_simple_tensor_inputs, other_inputs = split_on_simple_tensor(flat_inputs) - - transform = CopyCloneTransform() - transformed_sample = transform(flat_inputs) - - first_simple_tensor_output, other_simple_tensor_outputs, other_outputs = split_on_simple_tensor(transformed_sample) - - if first_simple_tensor_input is not None: - if other_inputs: - assert not transform.was_applied(first_simple_tensor_output, first_simple_tensor_input) - else: - assert transform.was_applied(first_simple_tensor_output, first_simple_tensor_input) - - for output, inpt in zip(other_simple_tensor_outputs, other_simple_tensor_inputs): - assert not transform.was_applied(output, inpt) - - for input, output in zip(other_inputs, other_outputs): - assert transform.was_applied(output, input) - - -@pytest.mark.parametrize("p", [0.0, 1.0]) -class TestRandomHorizontalFlip: - def input_expected_image_tensor(self, p, dtype=torch.float32): - input = torch.tensor([[[0, 1], [0, 1]], [[1, 0], [1, 0]]], dtype=dtype) - expected = torch.tensor([[[1, 0], [1, 0]], [[0, 1], [0, 1]]], dtype=dtype) - - return input, expected if p == 1 else input - - def test_simple_tensor(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomHorizontalFlip(p=p) - - actual = transform(input) - - assert_equal(expected, actual) - - def test_pil_image(self, p): - input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8) - transform = transforms.RandomHorizontalFlip(p=p) - - actual = transform(to_pil_image(input)) - - assert_equal(expected, pil_to_tensor(actual)) - - def test_datapoints_image(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomHorizontalFlip(p=p) - - actual = transform(datapoints.Image(input)) - - assert_equal(datapoints.Image(expected), actual) - - def test_datapoints_mask(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomHorizontalFlip(p=p) - - actual = transform(datapoints.Mask(input)) - - assert_equal(datapoints.Mask(expected), actual) - - def test_datapoints_bounding_box(self, p): - input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)) - transform = transforms.RandomHorizontalFlip(p=p) - - actual = transform(input) - - expected_image_tensor = torch.tensor([5, 0, 10, 5]) if p == 1.0 else input - expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor) - assert_equal(expected, actual) - assert actual.format == expected.format - assert actual.spatial_size == expected.spatial_size - - -@pytest.mark.parametrize("p", [0.0, 1.0]) -class TestRandomVerticalFlip: - def input_expected_image_tensor(self, p, dtype=torch.float32): - input = torch.tensor([[[1, 1], [0, 0]], [[1, 1], [0, 0]]], dtype=dtype) - expected = torch.tensor([[[0, 0], [1, 1]], [[0, 0], [1, 1]]], dtype=dtype) - - return input, expected if p == 1 else input - - def test_simple_tensor(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomVerticalFlip(p=p) - - actual = transform(input) - - assert_equal(expected, actual) - - def test_pil_image(self, p): - input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8) - transform = transforms.RandomVerticalFlip(p=p) - - actual = transform(to_pil_image(input)) - - assert_equal(expected, pil_to_tensor(actual)) - - def test_datapoints_image(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomVerticalFlip(p=p) - - actual = transform(datapoints.Image(input)) - - assert_equal(datapoints.Image(expected), actual) - - def test_datapoints_mask(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomVerticalFlip(p=p) - - actual = transform(datapoints.Mask(input)) - - assert_equal(datapoints.Mask(expected), actual) - - def test_datapoints_bounding_box(self, p): - input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)) - transform = transforms.RandomVerticalFlip(p=p) - - actual = transform(input) - - expected_image_tensor = torch.tensor([0, 5, 5, 10]) if p == 1.0 else input - expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor) - assert_equal(expected, actual) - assert actual.format == expected.format - assert actual.spatial_size == expected.spatial_size - - -class TestPad: - def test_assertions(self): - with pytest.raises(TypeError, match="Got inappropriate padding arg"): - transforms.Pad("abc") - - with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"): - transforms.Pad([-0.7, 0, 0.7]) - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.Pad(12, fill="abc") - - with pytest.raises(ValueError, match="Padding mode should be either"): - transforms.Pad(12, padding_mode="abc") - - @pytest.mark.parametrize("padding", [1, (1, 2), [1, 2, 3, 4]]) - @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) - @pytest.mark.parametrize("padding_mode", ["constant", "edge"]) - def test__transform(self, padding, fill, padding_mode, mocker): - transform = transforms.Pad(padding, fill=fill, padding_mode=padding_mode) - - fn = mocker.patch("torchvision.transforms.v2.functional.pad") - inpt = mocker.MagicMock(spec=datapoints.Image) - _ = transform(inpt) - - fill = transforms._utils._convert_fill_arg(fill) - if isinstance(padding, tuple): - padding = list(padding) - fn.assert_called_once_with(inpt, padding=padding, fill=fill, padding_mode=padding_mode) - - @pytest.mark.parametrize("fill", [12, {datapoints.Image: 12, datapoints.Mask: 34}]) - def test__transform_image_mask(self, fill, mocker): - transform = transforms.Pad(1, fill=fill, padding_mode="constant") - - fn = mocker.patch("torchvision.transforms.v2.functional.pad") - image = datapoints.Image(torch.rand(3, 32, 32)) - mask = datapoints.Mask(torch.randint(0, 5, size=(32, 32))) - inpt = [image, mask] - _ = transform(inpt) - - if isinstance(fill, int): - fill = transforms._utils._convert_fill_arg(fill) - calls = [ - mocker.call(image, padding=1, fill=fill, padding_mode="constant"), - mocker.call(mask, padding=1, fill=fill, padding_mode="constant"), - ] - else: - fill_img = transforms._utils._convert_fill_arg(fill[type(image)]) - fill_mask = transforms._utils._convert_fill_arg(fill[type(mask)]) - calls = [ - mocker.call(image, padding=1, fill=fill_img, padding_mode="constant"), - mocker.call(mask, padding=1, fill=fill_mask, padding_mode="constant"), - ] - fn.assert_has_calls(calls) - - -class TestRandomZoomOut: - def test_assertions(self): - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.RandomZoomOut(fill="abc") - - with pytest.raises(TypeError, match="should be a sequence of length"): - transforms.RandomZoomOut(0, side_range=0) - - with pytest.raises(ValueError, match="Invalid canvas side range"): - transforms.RandomZoomOut(0, side_range=[4.0, 1.0]) - - @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) - @pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]]) - def test__get_params(self, fill, side_range, mocker): - transform = transforms.RandomZoomOut(fill=fill, side_range=side_range) - - image = mocker.MagicMock(spec=datapoints.Image) - h, w = image.spatial_size = (24, 32) - - params = transform._get_params([image]) - - assert len(params["padding"]) == 4 - assert 0 <= params["padding"][0] <= (side_range[1] - 1) * w - assert 0 <= params["padding"][1] <= (side_range[1] - 1) * h - assert 0 <= params["padding"][2] <= (side_range[1] - 1) * w - assert 0 <= params["padding"][3] <= (side_range[1] - 1) * h - - @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) - @pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]]) - def test__transform(self, fill, side_range, mocker): - inpt = mocker.MagicMock(spec=datapoints.Image) - inpt.num_channels = 3 - inpt.spatial_size = (24, 32) - - transform = transforms.RandomZoomOut(fill=fill, side_range=side_range, p=1) - - fn = mocker.patch("torchvision.transforms.v2.functional.pad") - # vfdev-5, Feature Request: let's store params as Transform attribute - # This could be also helpful for users - # Otherwise, we can mock transform._get_params - torch.manual_seed(12) - _ = transform(inpt) - torch.manual_seed(12) - torch.rand(1) # random apply changes random state - params = transform._get_params([inpt]) - - fill = transforms._utils._convert_fill_arg(fill) - fn.assert_called_once_with(inpt, **params, fill=fill) - - @pytest.mark.parametrize("fill", [12, {datapoints.Image: 12, datapoints.Mask: 34}]) - def test__transform_image_mask(self, fill, mocker): - transform = transforms.RandomZoomOut(fill=fill, p=1.0) - - fn = mocker.patch("torchvision.transforms.v2.functional.pad") - image = datapoints.Image(torch.rand(3, 32, 32)) - mask = datapoints.Mask(torch.randint(0, 5, size=(32, 32))) - inpt = [image, mask] - - torch.manual_seed(12) - _ = transform(inpt) - torch.manual_seed(12) - torch.rand(1) # random apply changes random state - params = transform._get_params(inpt) - - if isinstance(fill, int): - fill = transforms._utils._convert_fill_arg(fill) - calls = [ - mocker.call(image, **params, fill=fill), - mocker.call(mask, **params, fill=fill), - ] - else: - fill_img = transforms._utils._convert_fill_arg(fill[type(image)]) - fill_mask = transforms._utils._convert_fill_arg(fill[type(mask)]) - calls = [ - mocker.call(image, **params, fill=fill_img), - mocker.call(mask, **params, fill=fill_mask), - ] - fn.assert_has_calls(calls) - - -class TestRandomRotation: - def test_assertions(self): - with pytest.raises(ValueError, match="is a single number, it must be positive"): - transforms.RandomRotation(-0.7) - - for d in [[-0.7], [-0.7, 0, 0.7]]: - with pytest.raises(ValueError, match="degrees should be a sequence of length 2"): - transforms.RandomRotation(d) - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.RandomRotation(12, fill="abc") - - with pytest.raises(TypeError, match="center should be a sequence of length"): - transforms.RandomRotation(12, center=12) - - with pytest.raises(ValueError, match="center should be a sequence of length"): - transforms.RandomRotation(12, center=[1, 2, 3]) - - def test__get_params(self): - angle_bound = 34 - transform = transforms.RandomRotation(angle_bound) - - params = transform._get_params(None) - assert -angle_bound <= params["angle"] <= angle_bound - - angle_bounds = [12, 34] - transform = transforms.RandomRotation(angle_bounds) - - params = transform._get_params(None) - assert angle_bounds[0] <= params["angle"] <= angle_bounds[1] - - @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)]) - @pytest.mark.parametrize("expand", [False, True]) - @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) - @pytest.mark.parametrize("center", [None, [2.0, 3.0]]) - def test__transform(self, degrees, expand, fill, center, mocker): - interpolation = InterpolationMode.BILINEAR - transform = transforms.RandomRotation( - degrees, interpolation=interpolation, expand=expand, fill=fill, center=center - ) - - if isinstance(degrees, (tuple, list)): - assert transform.degrees == [float(degrees[0]), float(degrees[1])] - else: - assert transform.degrees == [float(-degrees), float(degrees)] - - fn = mocker.patch("torchvision.transforms.v2.functional.rotate") - inpt = mocker.MagicMock(spec=datapoints.Image) - # vfdev-5, Feature Request: let's store params as Transform attribute - # This could be also helpful for users - # Otherwise, we can mock transform._get_params - torch.manual_seed(12) - _ = transform(inpt) - torch.manual_seed(12) - params = transform._get_params(inpt) - - fill = transforms._utils._convert_fill_arg(fill) - fn.assert_called_once_with(inpt, **params, interpolation=interpolation, expand=expand, fill=fill, center=center) - - @pytest.mark.parametrize("angle", [34, -87]) - @pytest.mark.parametrize("expand", [False, True]) - def test_boundingbox_spatial_size(self, angle, expand): - # Specific test for BoundingBox.rotate - bbox = datapoints.BoundingBox( - torch.tensor([1, 2, 3, 4]), format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(32, 32) - ) - img = datapoints.Image(torch.rand(1, 3, 32, 32)) - - out_img = img.rotate(angle, expand=expand) - out_bbox = bbox.rotate(angle, expand=expand) - - assert out_img.spatial_size == out_bbox.spatial_size - - -class TestRandomAffine: - def test_assertions(self): - with pytest.raises(ValueError, match="is a single number, it must be positive"): - transforms.RandomAffine(-0.7) - - for d in [[-0.7], [-0.7, 0, 0.7]]: - with pytest.raises(ValueError, match="degrees should be a sequence of length 2"): - transforms.RandomAffine(d) - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.RandomAffine(12, fill="abc") - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.RandomAffine(12, fill="abc") - - for kwargs in [ - {"center": 12}, - {"translate": 12}, - {"scale": 12}, - ]: - with pytest.raises(TypeError, match="should be a sequence of length"): - transforms.RandomAffine(12, **kwargs) - - for kwargs in [{"center": [1, 2, 3]}, {"translate": [1, 2, 3]}, {"scale": [1, 2, 3]}]: - with pytest.raises(ValueError, match="should be a sequence of length"): - transforms.RandomAffine(12, **kwargs) - - with pytest.raises(ValueError, match="translation values should be between 0 and 1"): - transforms.RandomAffine(12, translate=[-1.0, 2.0]) - - with pytest.raises(ValueError, match="scale values should be positive"): - transforms.RandomAffine(12, scale=[-1.0, 2.0]) - - with pytest.raises(ValueError, match="is a single number, it must be positive"): - transforms.RandomAffine(12, shear=-10) - - for s in [[-0.7], [-0.7, 0, 0.7]]: - with pytest.raises(ValueError, match="shear should be a sequence of length 2"): - transforms.RandomAffine(12, shear=s) - - @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)]) - @pytest.mark.parametrize("translate", [None, [0.1, 0.2]]) - @pytest.mark.parametrize("scale", [None, [0.7, 1.2]]) - @pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]]) - def test__get_params(self, degrees, translate, scale, shear, mocker): - image = mocker.MagicMock(spec=datapoints.Image) - image.num_channels = 3 - image.spatial_size = (24, 32) - h, w = image.spatial_size - - transform = transforms.RandomAffine(degrees, translate=translate, scale=scale, shear=shear) - params = transform._get_params([image]) - - if not isinstance(degrees, (list, tuple)): - assert -degrees <= params["angle"] <= degrees - else: - assert degrees[0] <= params["angle"] <= degrees[1] - - if translate is not None: - w_max = int(round(translate[0] * w)) - h_max = int(round(translate[1] * h)) - assert -w_max <= params["translate"][0] <= w_max - assert -h_max <= params["translate"][1] <= h_max - else: - assert params["translate"] == (0, 0) - - if scale is not None: - assert scale[0] <= params["scale"] <= scale[1] - else: - assert params["scale"] == 1.0 - - if shear is not None: - if isinstance(shear, float): - assert -shear <= params["shear"][0] <= shear - assert params["shear"][1] == 0.0 - elif len(shear) == 2: - assert shear[0] <= params["shear"][0] <= shear[1] - assert params["shear"][1] == 0.0 - else: - assert shear[0] <= params["shear"][0] <= shear[1] - assert shear[2] <= params["shear"][1] <= shear[3] - else: - assert params["shear"] == (0, 0) - - @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)]) - @pytest.mark.parametrize("translate", [None, [0.1, 0.2]]) - @pytest.mark.parametrize("scale", [None, [0.7, 1.2]]) - @pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]]) - @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) - @pytest.mark.parametrize("center", [None, [2.0, 3.0]]) - def test__transform(self, degrees, translate, scale, shear, fill, center, mocker): - interpolation = InterpolationMode.BILINEAR - transform = transforms.RandomAffine( - degrees, - translate=translate, - scale=scale, - shear=shear, - interpolation=interpolation, - fill=fill, - center=center, - ) - - if isinstance(degrees, (tuple, list)): - assert transform.degrees == [float(degrees[0]), float(degrees[1])] - else: - assert transform.degrees == [float(-degrees), float(degrees)] - - fn = mocker.patch("torchvision.transforms.v2.functional.affine") - inpt = mocker.MagicMock(spec=datapoints.Image) - inpt.num_channels = 3 - inpt.spatial_size = (24, 32) - - # vfdev-5, Feature Request: let's store params as Transform attribute - # This could be also helpful for users - # Otherwise, we can mock transform._get_params - torch.manual_seed(12) - _ = transform(inpt) - torch.manual_seed(12) - params = transform._get_params([inpt]) - - fill = transforms._utils._convert_fill_arg(fill) - fn.assert_called_once_with(inpt, **params, interpolation=interpolation, fill=fill, center=center) - - -class TestRandomCrop: - def test_assertions(self): - with pytest.raises(ValueError, match="Please provide only two dimensions"): - transforms.RandomCrop([10, 12, 14]) - - with pytest.raises(TypeError, match="Got inappropriate padding arg"): - transforms.RandomCrop([10, 12], padding="abc") - - with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"): - transforms.RandomCrop([10, 12], padding=[-0.7, 0, 0.7]) - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.RandomCrop([10, 12], padding=1, fill="abc") - - with pytest.raises(ValueError, match="Padding mode should be either"): - transforms.RandomCrop([10, 12], padding=1, padding_mode="abc") - - @pytest.mark.parametrize("padding", [None, 1, [2, 3], [1, 2, 3, 4]]) - @pytest.mark.parametrize("size, pad_if_needed", [((10, 10), False), ((50, 25), True)]) - def test__get_params(self, padding, pad_if_needed, size, mocker): - image = mocker.MagicMock(spec=datapoints.Image) - image.num_channels = 3 - image.spatial_size = (24, 32) - h, w = image.spatial_size - - transform = transforms.RandomCrop(size, padding=padding, pad_if_needed=pad_if_needed) - params = transform._get_params([image]) - - if padding is not None: - if isinstance(padding, int): - pad_top = pad_bottom = pad_left = pad_right = padding - elif isinstance(padding, list) and len(padding) == 2: - pad_left = pad_right = padding[0] - pad_top = pad_bottom = padding[1] - elif isinstance(padding, list) and len(padding) == 4: - pad_left, pad_top, pad_right, pad_bottom = padding - - h += pad_top + pad_bottom - w += pad_left + pad_right - else: - pad_left = pad_right = pad_top = pad_bottom = 0 - - if pad_if_needed: - if w < size[1]: - diff = size[1] - w - pad_left += diff - pad_right += diff - w += 2 * diff - if h < size[0]: - diff = size[0] - h - pad_top += diff - pad_bottom += diff - h += 2 * diff - - padding = [pad_left, pad_top, pad_right, pad_bottom] - - assert 0 <= params["top"] <= h - size[0] + 1 - assert 0 <= params["left"] <= w - size[1] + 1 - assert params["height"] == size[0] - assert params["width"] == size[1] - assert params["needs_pad"] is any(padding) - assert params["padding"] == padding - - @pytest.mark.parametrize("padding", [None, 1, [2, 3], [1, 2, 3, 4]]) - @pytest.mark.parametrize("pad_if_needed", [False, True]) - @pytest.mark.parametrize("fill", [False, True]) - @pytest.mark.parametrize("padding_mode", ["constant", "edge"]) - def test__transform(self, padding, pad_if_needed, fill, padding_mode, mocker): - output_size = [10, 12] - transform = transforms.RandomCrop( - output_size, padding=padding, pad_if_needed=pad_if_needed, fill=fill, padding_mode=padding_mode - ) - - inpt = mocker.MagicMock(spec=datapoints.Image) - inpt.num_channels = 3 - inpt.spatial_size = (32, 32) - - expected = mocker.MagicMock(spec=datapoints.Image) - expected.num_channels = 3 - if isinstance(padding, int): - expected.spatial_size = (inpt.spatial_size[0] + padding, inpt.spatial_size[1] + padding) - elif isinstance(padding, list): - expected.spatial_size = ( - inpt.spatial_size[0] + sum(padding[0::2]), - inpt.spatial_size[1] + sum(padding[1::2]), - ) - else: - expected.spatial_size = inpt.spatial_size - _ = mocker.patch("torchvision.transforms.v2.functional.pad", return_value=expected) - fn_crop = mocker.patch("torchvision.transforms.v2.functional.crop") - - # vfdev-5, Feature Request: let's store params as Transform attribute - # This could be also helpful for users - # Otherwise, we can mock transform._get_params - torch.manual_seed(12) - _ = transform(inpt) - torch.manual_seed(12) - params = transform._get_params([inpt]) - if padding is None and not pad_if_needed: - fn_crop.assert_called_once_with( - inpt, top=params["top"], left=params["left"], height=output_size[0], width=output_size[1] - ) - elif not pad_if_needed: - fn_crop.assert_called_once_with( - expected, top=params["top"], left=params["left"], height=output_size[0], width=output_size[1] - ) - elif padding is None: - # vfdev-5: I do not know how to mock and test this case - pass - else: - # vfdev-5: I do not know how to mock and test this case - pass - - -class TestGaussianBlur: - def test_assertions(self): - with pytest.raises(ValueError, match="Kernel size should be a tuple/list of two integers"): - transforms.GaussianBlur([10, 12, 14]) - - with pytest.raises(ValueError, match="Kernel size value should be an odd and positive number"): - transforms.GaussianBlur(4) - - with pytest.raises( - TypeError, match="sigma should be a single int or float or a list/tuple with length 2 floats." - ): - transforms.GaussianBlur(3, sigma=[1, 2, 3]) - - with pytest.raises(ValueError, match="If sigma is a single number, it must be positive"): - transforms.GaussianBlur(3, sigma=-1.0) - - with pytest.raises(ValueError, match="sigma values should be positive and of the form"): - transforms.GaussianBlur(3, sigma=[2.0, 1.0]) - - @pytest.mark.parametrize("sigma", [10.0, [10.0, 12.0]]) - def test__get_params(self, sigma): - transform = transforms.GaussianBlur(3, sigma=sigma) - params = transform._get_params([]) - - if isinstance(sigma, float): - assert params["sigma"][0] == params["sigma"][1] == 10 - else: - assert sigma[0] <= params["sigma"][0] <= sigma[1] - assert sigma[0] <= params["sigma"][1] <= sigma[1] - - @pytest.mark.parametrize("kernel_size", [3, [3, 5], (5, 3)]) - @pytest.mark.parametrize("sigma", [2.0, [2.0, 3.0]]) - def test__transform(self, kernel_size, sigma, mocker): - transform = transforms.GaussianBlur(kernel_size=kernel_size, sigma=sigma) - - if isinstance(kernel_size, (tuple, list)): - assert transform.kernel_size == kernel_size - else: - kernel_size = (kernel_size, kernel_size) - assert transform.kernel_size == kernel_size - - if isinstance(sigma, (tuple, list)): - assert transform.sigma == sigma - else: - assert transform.sigma == [sigma, sigma] - - fn = mocker.patch("torchvision.transforms.v2.functional.gaussian_blur") - inpt = mocker.MagicMock(spec=datapoints.Image) - inpt.num_channels = 3 - inpt.spatial_size = (24, 32) - - # vfdev-5, Feature Request: let's store params as Transform attribute - # This could be also helpful for users - # Otherwise, we can mock transform._get_params - torch.manual_seed(12) - _ = transform(inpt) - torch.manual_seed(12) - params = transform._get_params([inpt]) - - fn.assert_called_once_with(inpt, kernel_size, **params) - - -class TestRandomColorOp: - @pytest.mark.parametrize("p", [0.0, 1.0]) - @pytest.mark.parametrize( - "transform_cls, func_op_name, kwargs", - [ - (transforms.RandomEqualize, "equalize", {}), - (transforms.RandomInvert, "invert", {}), - (transforms.RandomAutocontrast, "autocontrast", {}), - (transforms.RandomPosterize, "posterize", {"bits": 4}), - (transforms.RandomSolarize, "solarize", {"threshold": 0.5}), - (transforms.RandomAdjustSharpness, "adjust_sharpness", {"sharpness_factor": 0.5}), - ], - ) - def test__transform(self, p, transform_cls, func_op_name, kwargs, mocker): - transform = transform_cls(p=p, **kwargs) - - fn = mocker.patch(f"torchvision.transforms.v2.functional.{func_op_name}") - inpt = mocker.MagicMock(spec=datapoints.Image) - _ = transform(inpt) - if p > 0.0: - fn.assert_called_once_with(inpt, **kwargs) - else: - assert fn.call_count == 0 - - -class TestRandomPerspective: - def test_assertions(self): - with pytest.raises(ValueError, match="Argument distortion_scale value should be between 0 and 1"): - transforms.RandomPerspective(distortion_scale=-1.0) - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.RandomPerspective(0.5, fill="abc") - - def test__get_params(self, mocker): - dscale = 0.5 - transform = transforms.RandomPerspective(dscale) - image = mocker.MagicMock(spec=datapoints.Image) - image.num_channels = 3 - image.spatial_size = (24, 32) - - params = transform._get_params([image]) - - h, w = image.spatial_size - assert "coefficients" in params - assert len(params["coefficients"]) == 8 - - @pytest.mark.parametrize("distortion_scale", [0.1, 0.7]) - def test__transform(self, distortion_scale, mocker): - interpolation = InterpolationMode.BILINEAR - fill = 12 - transform = transforms.RandomPerspective(distortion_scale, fill=fill, interpolation=interpolation) - - fn = mocker.patch("torchvision.transforms.v2.functional.perspective") - inpt = mocker.MagicMock(spec=datapoints.Image) - inpt.num_channels = 3 - inpt.spatial_size = (24, 32) - # vfdev-5, Feature Request: let's store params as Transform attribute - # This could be also helpful for users - # Otherwise, we can mock transform._get_params - torch.manual_seed(12) - _ = transform(inpt) - torch.manual_seed(12) - torch.rand(1) # random apply changes random state - params = transform._get_params([inpt]) - - fill = transforms._utils._convert_fill_arg(fill) - fn.assert_called_once_with(inpt, None, None, **params, fill=fill, interpolation=interpolation) - - -class TestElasticTransform: - def test_assertions(self): - - with pytest.raises(TypeError, match="alpha should be float or a sequence of floats"): - transforms.ElasticTransform({}) - - with pytest.raises(ValueError, match="alpha is a sequence its length should be one of 2"): - transforms.ElasticTransform([1.0, 2.0, 3.0]) - - with pytest.raises(ValueError, match="alpha should be a sequence of floats"): - transforms.ElasticTransform([1, 2]) - - with pytest.raises(TypeError, match="sigma should be float or a sequence of floats"): - transforms.ElasticTransform(1.0, {}) - - with pytest.raises(ValueError, match="sigma is a sequence its length should be one of 2"): - transforms.ElasticTransform(1.0, [1.0, 2.0, 3.0]) - - with pytest.raises(ValueError, match="sigma should be a sequence of floats"): - transforms.ElasticTransform(1.0, [1, 2]) - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.ElasticTransform(1.0, 2.0, fill="abc") - - def test__get_params(self, mocker): - alpha = 2.0 - sigma = 3.0 - transform = transforms.ElasticTransform(alpha, sigma) - image = mocker.MagicMock(spec=datapoints.Image) - image.num_channels = 3 - image.spatial_size = (24, 32) - - params = transform._get_params([image]) - - h, w = image.spatial_size - displacement = params["displacement"] - assert displacement.shape == (1, h, w, 2) - assert (-alpha / w <= displacement[0, ..., 0]).all() and (displacement[0, ..., 0] <= alpha / w).all() - assert (-alpha / h <= displacement[0, ..., 1]).all() and (displacement[0, ..., 1] <= alpha / h).all() - - @pytest.mark.parametrize("alpha", [5.0, [5.0, 10.0]]) - @pytest.mark.parametrize("sigma", [2.0, [2.0, 5.0]]) - def test__transform(self, alpha, sigma, mocker): - interpolation = InterpolationMode.BILINEAR - fill = 12 - transform = transforms.ElasticTransform(alpha, sigma=sigma, fill=fill, interpolation=interpolation) - - if isinstance(alpha, float): - assert transform.alpha == [alpha, alpha] - else: - assert transform.alpha == alpha - - if isinstance(sigma, float): - assert transform.sigma == [sigma, sigma] - else: - assert transform.sigma == sigma - - fn = mocker.patch("torchvision.transforms.v2.functional.elastic") - inpt = mocker.MagicMock(spec=datapoints.Image) - inpt.num_channels = 3 - inpt.spatial_size = (24, 32) - - # Let's mock transform._get_params to control the output: - transform._get_params = mocker.MagicMock() - _ = transform(inpt) - params = transform._get_params([inpt]) - fill = transforms._utils._convert_fill_arg(fill) - fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation) - - -class TestRandomErasing: - def test_assertions(self, mocker): - with pytest.raises(TypeError, match="Argument value should be either a number or str or a sequence"): - transforms.RandomErasing(value={}) - - with pytest.raises(ValueError, match="If value is str, it should be 'random'"): - transforms.RandomErasing(value="abc") - - with pytest.raises(TypeError, match="Scale should be a sequence"): - transforms.RandomErasing(scale=123) - - with pytest.raises(TypeError, match="Ratio should be a sequence"): - transforms.RandomErasing(ratio=123) - - with pytest.raises(ValueError, match="Scale should be between 0 and 1"): - transforms.RandomErasing(scale=[-1, 2]) - - image = mocker.MagicMock(spec=datapoints.Image) - image.num_channels = 3 - image.spatial_size = (24, 32) - - transform = transforms.RandomErasing(value=[1, 2, 3, 4]) - - with pytest.raises(ValueError, match="If value is a sequence, it should have either a single value"): - transform._get_params([image]) - - @pytest.mark.parametrize("value", [5.0, [1, 2, 3], "random"]) - def test__get_params(self, value, mocker): - image = mocker.MagicMock(spec=datapoints.Image) - image.num_channels = 3 - image.spatial_size = (24, 32) - - transform = transforms.RandomErasing(value=value) - params = transform._get_params([image]) - - v = params["v"] - h, w = params["h"], params["w"] - i, j = params["i"], params["j"] - assert isinstance(v, torch.Tensor) - if value == "random": - assert v.shape == (image.num_channels, h, w) - elif isinstance(value, (int, float)): - assert v.shape == (1, 1, 1) - elif isinstance(value, (list, tuple)): - assert v.shape == (image.num_channels, 1, 1) - - assert 0 <= i <= image.spatial_size[0] - h - assert 0 <= j <= image.spatial_size[1] - w - - @pytest.mark.parametrize("p", [0, 1]) - def test__transform(self, mocker, p): - transform = transforms.RandomErasing(p=p) - transform._transformed_types = (mocker.MagicMock,) - - i_sentinel = mocker.MagicMock() - j_sentinel = mocker.MagicMock() - h_sentinel = mocker.MagicMock() - w_sentinel = mocker.MagicMock() - v_sentinel = mocker.MagicMock() - mocker.patch( - "torchvision.transforms.v2._augment.RandomErasing._get_params", - return_value=dict(i=i_sentinel, j=j_sentinel, h=h_sentinel, w=w_sentinel, v=v_sentinel), - ) - - inpt_sentinel = mocker.MagicMock() - - mock = mocker.patch("torchvision.transforms.v2._augment.F.erase") - output = transform(inpt_sentinel) - - if p: - mock.assert_called_once_with( - inpt_sentinel, - i=i_sentinel, - j=j_sentinel, - h=h_sentinel, - w=w_sentinel, - v=v_sentinel, - inplace=transform.inplace, - ) - else: - mock.assert_not_called() - assert output is inpt_sentinel - - -class TestTransform: - @pytest.mark.parametrize( - "inpt_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int], - ) - def test_check_transformed_types(self, inpt_type, mocker): - # This test ensures that we correctly handle which types to transform and which to bypass - t = transforms.Transform() - inpt = mocker.MagicMock(spec=inpt_type) - - if inpt_type in (np.ndarray, str, int): - output = t(inpt) - assert output is inpt - else: - with pytest.raises(NotImplementedError): - t(inpt) - - -class TestToImageTensor: - @pytest.mark.parametrize( - "inpt_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int], - ) - def test__transform(self, inpt_type, mocker): - fn = mocker.patch( - "torchvision.transforms.v2.functional.to_image_tensor", - return_value=torch.rand(1, 3, 8, 8), - ) - - inpt = mocker.MagicMock(spec=inpt_type) - transform = transforms.ToImageTensor() - transform(inpt) - if inpt_type in (datapoints.BoundingBox, datapoints.Image, str, int): - assert fn.call_count == 0 - else: - fn.assert_called_once_with(inpt) - - -class TestToImagePIL: - @pytest.mark.parametrize( - "inpt_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int], - ) - def test__transform(self, inpt_type, mocker): - fn = mocker.patch("torchvision.transforms.v2.functional.to_image_pil") - - inpt = mocker.MagicMock(spec=inpt_type) - transform = transforms.ToImagePIL() - transform(inpt) - if inpt_type in (datapoints.BoundingBox, PIL.Image.Image, str, int): - assert fn.call_count == 0 - else: - fn.assert_called_once_with(inpt, mode=transform.mode) - - -class TestToPILImage: - @pytest.mark.parametrize( - "inpt_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int], - ) - def test__transform(self, inpt_type, mocker): - fn = mocker.patch("torchvision.transforms.v2.functional.to_image_pil") - - inpt = mocker.MagicMock(spec=inpt_type) - transform = transforms.ToPILImage() - transform(inpt) - if inpt_type in (PIL.Image.Image, datapoints.BoundingBox, str, int): - assert fn.call_count == 0 - else: - fn.assert_called_once_with(inpt, mode=transform.mode) - - -class TestToTensor: - @pytest.mark.parametrize( - "inpt_type", - [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int], - ) - def test__transform(self, inpt_type, mocker): - fn = mocker.patch("torchvision.transforms.functional.to_tensor") - - inpt = mocker.MagicMock(spec=inpt_type) - with pytest.warns(UserWarning, match="deprecated and will be removed"): - transform = transforms.ToTensor() - transform(inpt) - if inpt_type in (datapoints.Image, torch.Tensor, datapoints.BoundingBox, str, int): - assert fn.call_count == 0 - else: - fn.assert_called_once_with(inpt) - - -class TestContainers: - @pytest.mark.parametrize("transform_cls", [transforms.Compose, transforms.RandomChoice, transforms.RandomOrder]) - def test_assertions(self, transform_cls): - with pytest.raises(TypeError, match="Argument transforms should be a sequence of callables"): - transform_cls(transforms.RandomCrop(28)) - - @pytest.mark.parametrize("transform_cls", [transforms.Compose, transforms.RandomChoice, transforms.RandomOrder]) - @pytest.mark.parametrize( - "trfms", - [ - [transforms.Pad(2), transforms.RandomCrop(28)], - [lambda x: 2.0 * x, transforms.Pad(2), transforms.RandomCrop(28)], - [transforms.Pad(2), lambda x: 2.0 * x, transforms.RandomCrop(28)], - ], - ) - def test_ctor(self, transform_cls, trfms): - c = transform_cls(trfms) - inpt = torch.rand(1, 3, 32, 32) - output = c(inpt) - assert isinstance(output, torch.Tensor) - assert output.ndim == 4 - - -class TestRandomChoice: - def test_assertions(self): - with pytest.warns(UserWarning, match="Argument p is deprecated and will be removed"): - transforms.RandomChoice([transforms.Pad(2), transforms.RandomCrop(28)], p=[1, 2]) - - with pytest.raises(ValueError, match="The number of probabilities doesn't match the number of transforms"): - transforms.RandomChoice([transforms.Pad(2), transforms.RandomCrop(28)], probabilities=[1]) - - -class TestRandomIoUCrop: - @pytest.mark.parametrize("device", cpu_and_gpu()) - @pytest.mark.parametrize("options", [[0.5, 0.9], [2.0]]) - def test__get_params(self, device, options, mocker): - image = mocker.MagicMock(spec=datapoints.Image) - image.num_channels = 3 - image.spatial_size = (24, 32) - bboxes = datapoints.BoundingBox( - torch.tensor([[1, 1, 10, 10], [20, 20, 23, 23], [1, 20, 10, 23], [20, 1, 23, 10]]), - format="XYXY", - spatial_size=image.spatial_size, - device=device, - ) - sample = [image, bboxes] - - transform = transforms.RandomIoUCrop(sampler_options=options) - - n_samples = 5 - for _ in range(n_samples): - - params = transform._get_params(sample) - - if options == [2.0]: - assert len(params) == 0 - return - - assert len(params["is_within_crop_area"]) > 0 - assert params["is_within_crop_area"].dtype == torch.bool - - orig_h = image.spatial_size[0] - orig_w = image.spatial_size[1] - assert int(transform.min_scale * orig_h) <= params["height"] <= int(transform.max_scale * orig_h) - assert int(transform.min_scale * orig_w) <= params["width"] <= int(transform.max_scale * orig_w) - - left, top = params["left"], params["top"] - new_h, new_w = params["height"], params["width"] - ious = box_iou( - bboxes, - torch.tensor([[left, top, left + new_w, top + new_h]], dtype=bboxes.dtype, device=bboxes.device), - ) - assert ious.max() >= options[0] or ious.max() >= options[1], f"{ious} vs {options}" - - def test__transform_empty_params(self, mocker): - transform = transforms.RandomIoUCrop(sampler_options=[2.0]) - image = datapoints.Image(torch.rand(1, 3, 4, 4)) - bboxes = datapoints.BoundingBox(torch.tensor([[1, 1, 2, 2]]), format="XYXY", spatial_size=(4, 4)) - label = proto_datapoints.Label(torch.tensor([1])) - sample = [image, bboxes, label] - # Let's mock transform._get_params to control the output: - transform._get_params = mocker.MagicMock(return_value={}) - output = transform(sample) - torch.testing.assert_close(output, sample) - - def test_forward_assertion(self): - transform = transforms.RandomIoUCrop() - with pytest.raises( - TypeError, - match="requires input sample to contain tensor or PIL images and bounding boxes", - ): - transform(torch.tensor(0)) - - def test__transform(self, mocker): - transform = transforms.RandomIoUCrop() - - image = datapoints.Image(torch.rand(3, 32, 24)) - bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), extra_dims=(6,)) - masks = make_detection_mask((32, 24), num_objects=6) - - sample = [image, bboxes, masks] - - fn = mocker.patch("torchvision.transforms.v2.functional.crop", side_effect=lambda x, **params: x) - is_within_crop_area = torch.tensor([0, 1, 0, 1, 0, 1], dtype=torch.bool) - - params = dict(top=1, left=2, height=12, width=12, is_within_crop_area=is_within_crop_area) - transform._get_params = mocker.MagicMock(return_value=params) - output = transform(sample) - - assert fn.call_count == 3 - - expected_calls = [ - mocker.call(image, top=params["top"], left=params["left"], height=params["height"], width=params["width"]), - mocker.call(bboxes, top=params["top"], left=params["left"], height=params["height"], width=params["width"]), - mocker.call(masks, top=params["top"], left=params["left"], height=params["height"], width=params["width"]), - ] - - fn.assert_has_calls(expected_calls) - - # check number of bboxes vs number of labels: - output_bboxes = output[1] - assert isinstance(output_bboxes, datapoints.BoundingBox) - assert (output_bboxes[~is_within_crop_area] == 0).all() - - output_masks = output[2] - assert isinstance(output_masks, datapoints.Mask) - - -class TestScaleJitter: - def test__get_params(self, mocker): - spatial_size = (24, 32) - target_size = (16, 12) - scale_range = (0.5, 1.5) - - transform = transforms.ScaleJitter(target_size=target_size, scale_range=scale_range) - sample = mocker.MagicMock(spec=datapoints.Image, num_channels=3, spatial_size=spatial_size) - - n_samples = 5 - for _ in range(n_samples): - - params = transform._get_params([sample]) - - assert "size" in params - size = params["size"] - - assert isinstance(size, tuple) and len(size) == 2 - height, width = size - - r_min = min(target_size[1] / spatial_size[0], target_size[0] / spatial_size[1]) * scale_range[0] - r_max = min(target_size[1] / spatial_size[0], target_size[0] / spatial_size[1]) * scale_range[1] - - assert int(spatial_size[0] * r_min) <= height <= int(spatial_size[0] * r_max) - assert int(spatial_size[1] * r_min) <= width <= int(spatial_size[1] * r_max) - - def test__transform(self, mocker): - interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode) - antialias_sentinel = mocker.MagicMock() - - transform = transforms.ScaleJitter( - target_size=(16, 12), interpolation=interpolation_sentinel, antialias=antialias_sentinel - ) - transform._transformed_types = (mocker.MagicMock,) - - size_sentinel = mocker.MagicMock() - mocker.patch( - "torchvision.transforms.v2._geometry.ScaleJitter._get_params", return_value=dict(size=size_sentinel) - ) - - inpt_sentinel = mocker.MagicMock() - - mock = mocker.patch("torchvision.transforms.v2._geometry.F.resize") - transform(inpt_sentinel) - - mock.assert_called_once_with( - inpt_sentinel, size=size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel - ) - - -class TestRandomShortestSize: - @pytest.mark.parametrize("min_size,max_size", [([5, 9], 20), ([5, 9], None)]) - def test__get_params(self, min_size, max_size, mocker): - spatial_size = (3, 10) - - transform = transforms.RandomShortestSize(min_size=min_size, max_size=max_size) - - sample = mocker.MagicMock(spec=datapoints.Image, num_channels=3, spatial_size=spatial_size) - params = transform._get_params([sample]) - - assert "size" in params - size = params["size"] - - assert isinstance(size, tuple) and len(size) == 2 - - longer = max(size) - shorter = min(size) - if max_size is not None: - assert longer <= max_size - assert shorter <= max_size - else: - assert shorter in min_size - - def test__transform(self, mocker): - interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode) - antialias_sentinel = mocker.MagicMock() - - transform = transforms.RandomShortestSize( - min_size=[3, 5, 7], max_size=12, interpolation=interpolation_sentinel, antialias=antialias_sentinel - ) - transform._transformed_types = (mocker.MagicMock,) - - size_sentinel = mocker.MagicMock() - mocker.patch( - "torchvision.transforms.v2._geometry.RandomShortestSize._get_params", - return_value=dict(size=size_sentinel), - ) - - inpt_sentinel = mocker.MagicMock() - - mock = mocker.patch("torchvision.transforms.v2._geometry.F.resize") - transform(inpt_sentinel) - - mock.assert_called_once_with( - inpt_sentinel, size=size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel - ) +def test_mixup_cutmix(transform, input): + transform(input) + + input_copy = dict(input) + input_copy["path"] = "/path/to/somewhere" + input_copy["num"] = 1234 + transform(input_copy) + + # Check if we raise an error if sample contains bbox or mask or label + err_msg = "does not support PIL images, bounding boxes, masks and plain labels" + input_copy = dict(input) + for unsup_data in [ + make_label(), + make_bounding_box(format="XYXY"), + make_detection_mask(), + make_segmentation_mask(), + ]: + input_copy["unsupported"] = unsup_data + with pytest.raises(TypeError, match=err_msg): + transform(input_copy) class TestSimpleCopyPaste: @@ -1602,27 +94,27 @@ def create_fake_image(self, mocker, image_type): return mocker.MagicMock(spec=image_type) def test__extract_image_targets_assertion(self, mocker): - transform = proto_transforms.SimpleCopyPaste() + transform = transforms.SimpleCopyPaste() flat_sample = [ # images, batch size = 2 - self.create_fake_image(mocker, datapoints.Image), + self.create_fake_image(mocker, Image), # labels, bboxes, masks - mocker.MagicMock(spec=proto_datapoints.Label), - mocker.MagicMock(spec=datapoints.BoundingBox), - mocker.MagicMock(spec=datapoints.Mask), + mocker.MagicMock(spec=datapoints.Label), + mocker.MagicMock(spec=BoundingBox), + mocker.MagicMock(spec=Mask), # labels, bboxes, masks - mocker.MagicMock(spec=datapoints.BoundingBox), - mocker.MagicMock(spec=datapoints.Mask), + mocker.MagicMock(spec=BoundingBox), + mocker.MagicMock(spec=Mask), ] with pytest.raises(TypeError, match="requires input sample to contain equal sized list of Images"): transform._extract_image_targets(flat_sample) - @pytest.mark.parametrize("image_type", [datapoints.Image, PIL.Image.Image, torch.Tensor]) - @pytest.mark.parametrize("label_type", [proto_datapoints.Label, proto_datapoints.OneHotLabel]) + @pytest.mark.parametrize("image_type", [Image, PIL.Image.Image, torch.Tensor]) + @pytest.mark.parametrize("label_type", [datapoints.Label, datapoints.OneHotLabel]) def test__extract_image_targets(self, image_type, label_type, mocker): - transform = proto_transforms.SimpleCopyPaste() + transform = transforms.SimpleCopyPaste() flat_sample = [ # images, batch size = 2 @@ -1630,12 +122,12 @@ def test__extract_image_targets(self, image_type, label_type, mocker): self.create_fake_image(mocker, image_type), # labels, bboxes, masks mocker.MagicMock(spec=label_type), - mocker.MagicMock(spec=datapoints.BoundingBox), - mocker.MagicMock(spec=datapoints.Mask), + mocker.MagicMock(spec=BoundingBox), + mocker.MagicMock(spec=Mask), # labels, bboxes, masks mocker.MagicMock(spec=label_type), - mocker.MagicMock(spec=datapoints.BoundingBox), - mocker.MagicMock(spec=datapoints.Mask), + mocker.MagicMock(spec=BoundingBox), + mocker.MagicMock(spec=Mask), ] images, targets = transform._extract_image_targets(flat_sample) @@ -1650,15 +142,15 @@ def test__extract_image_targets(self, image_type, label_type, mocker): for target in targets: for key, type_ in [ - ("boxes", datapoints.BoundingBox), - ("masks", datapoints.Mask), + ("boxes", BoundingBox), + ("masks", Mask), ("labels", label_type), ]: assert key in target assert isinstance(target[key], type_) assert target[key] in flat_sample - @pytest.mark.parametrize("label_type", [proto_datapoints.Label, proto_datapoints.OneHotLabel]) + @pytest.mark.parametrize("label_type", [datapoints.Label, datapoints.OneHotLabel]) def test__copy_paste(self, label_type): image = 2 * torch.ones(3, 32, 32) masks = torch.zeros(2, 32, 32) @@ -1668,13 +160,13 @@ def test__copy_paste(self, label_type): blending = True resize_interpolation = InterpolationMode.BILINEAR antialias = None - if label_type == proto_datapoints.OneHotLabel: + if label_type == datapoints.OneHotLabel: labels = torch.nn.functional.one_hot(labels, num_classes=5) target = { - "boxes": datapoints.BoundingBox( + "boxes": BoundingBox( torch.tensor([[2.0, 3.0, 8.0, 9.0], [20.0, 20.0, 30.0, 30.0]]), format="XYXY", spatial_size=(32, 32) ), - "masks": datapoints.Mask(masks), + "masks": Mask(masks), "labels": label_type(labels), } @@ -1683,17 +175,17 @@ def test__copy_paste(self, label_type): paste_masks[0, 13:19, 12:18] = 1 paste_masks[1, 15:19, 1:8] = 1 paste_labels = torch.tensor([3, 4]) - if label_type == proto_datapoints.OneHotLabel: + if label_type == datapoints.OneHotLabel: paste_labels = torch.nn.functional.one_hot(paste_labels, num_classes=5) paste_target = { - "boxes": datapoints.BoundingBox( + "boxes": BoundingBox( torch.tensor([[12.0, 13.0, 19.0, 18.0], [1.0, 15.0, 8.0, 19.0]]), format="XYXY", spatial_size=(32, 32) ), - "masks": datapoints.Mask(paste_masks), + "masks": Mask(paste_masks), "labels": label_type(paste_labels), } - transform = proto_transforms.SimpleCopyPaste() + transform = transforms.SimpleCopyPaste() random_selection = torch.tensor([0, 1]) output_image, output_target = transform._copy_paste( image, target, paste_image, paste_target, random_selection, blending, resize_interpolation, antialias @@ -1705,7 +197,7 @@ def test__copy_paste(self, label_type): torch.testing.assert_close(output_target["boxes"][2:, :], paste_target["boxes"]) expected_labels = torch.tensor([1, 2, 3, 4]) - if label_type == proto_datapoints.OneHotLabel: + if label_type == datapoints.OneHotLabel: expected_labels = torch.nn.functional.one_hot(expected_labels, num_classes=5) torch.testing.assert_close(output_target["labels"], label_type(expected_labels)) @@ -1720,13 +212,11 @@ def test__get_params(self, mocker): batch_shape = (10,) spatial_size = (11, 5) - transform = proto_transforms.FixedSizeCrop(size=crop_size) + transform = transforms.FixedSizeCrop(size=crop_size) flat_inputs = [ make_image(size=spatial_size, color_space="RGB"), - make_bounding_box( - format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape - ), + make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape), ] params = transform._get_params(flat_inputs) @@ -1748,7 +238,7 @@ def test__transform(self, mocker, needs): fill_sentinel = 12 padding_mode_sentinel = mocker.MagicMock() - transform = proto_transforms.FixedSizeCrop((-1, -1), fill=fill_sentinel, padding_mode=padding_mode_sentinel) + transform = transforms.FixedSizeCrop((-1, -1), fill=fill_sentinel, padding_mode=padding_mode_sentinel) transform._transformed_types = (mocker.MagicMock,) mocker.patch("torchvision.prototype.transforms._geometry.has_any", return_value=True) @@ -1822,12 +312,12 @@ def test__transform_culling(self, mocker): ) bounding_boxes = make_bounding_box( - format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) + format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) ) masks = make_detection_mask(size=spatial_size, extra_dims=(batch_size,)) labels = make_label(extra_dims=(batch_size,)) - transform = proto_transforms.FixedSizeCrop((-1, -1)) + transform = transforms.FixedSizeCrop((-1, -1)) mocker.patch("torchvision.prototype.transforms._geometry.has_any", return_value=True) output = transform( @@ -1860,11 +350,11 @@ def test__transform_bounding_box_clamping(self, mocker): ) bounding_box = make_bounding_box( - format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) + format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) ) mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_box") - transform = proto_transforms.FixedSizeCrop((-1, -1)) + transform = transforms.FixedSizeCrop((-1, -1)) mocker.patch("torchvision.prototype.transforms._geometry.has_any", return_value=True) transform(bounding_box) @@ -1872,178 +362,48 @@ def test__transform_bounding_box_clamping(self, mocker): mock.assert_called_once() -class TestLinearTransformation: - def test_assertions(self): - with pytest.raises(ValueError, match="transformation_matrix should be square"): - transforms.LinearTransformation(torch.rand(2, 3), torch.rand(5)) - - with pytest.raises(ValueError, match="mean_vector should have the same length"): - transforms.LinearTransformation(torch.rand(3, 3), torch.rand(5)) - - @pytest.mark.parametrize( - "inpt", - [ - 122 * torch.ones(1, 3, 8, 8), - 122.0 * torch.ones(1, 3, 8, 8), - datapoints.Image(122 * torch.ones(1, 3, 8, 8)), - PIL.Image.new("RGB", (8, 8), (122, 122, 122)), - ], - ) - def test__transform(self, inpt): - - v = 121 * torch.ones(3 * 8 * 8) - m = torch.ones(3 * 8 * 8, 3 * 8 * 8) - transform = transforms.LinearTransformation(m, v) - - if isinstance(inpt, PIL.Image.Image): - with pytest.raises(TypeError, match="LinearTransformation does not work on PIL Images"): - transform(inpt) - else: - output = transform(inpt) - assert isinstance(output, torch.Tensor) - assert output.unique() == 3 * 8 * 8 - assert output.dtype == inpt.dtype - - class TestLabelToOneHot: def test__transform(self): categories = ["apple", "pear", "pineapple"] - labels = proto_datapoints.Label(torch.tensor([0, 1, 2, 1]), categories=categories) - transform = proto_transforms.LabelToOneHot() + labels = datapoints.Label(torch.tensor([0, 1, 2, 1]), categories=categories) + transform = transforms.LabelToOneHot() ohe_labels = transform(labels) - assert isinstance(ohe_labels, proto_datapoints.OneHotLabel) + assert isinstance(ohe_labels, datapoints.OneHotLabel) assert ohe_labels.shape == (4, 3) assert ohe_labels.categories == labels.categories == categories -class TestRandomResize: - def test__get_params(self): - min_size = 3 - max_size = 6 - - transform = transforms.RandomResize(min_size=min_size, max_size=max_size) - - for _ in range(10): - params = transform._get_params([]) - - assert isinstance(params["size"], list) and len(params["size"]) == 1 - size = params["size"][0] - - assert min_size <= size < max_size - - def test__transform(self, mocker): - interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode) - antialias_sentinel = mocker.MagicMock() - - transform = transforms.RandomResize( - min_size=-1, max_size=-1, interpolation=interpolation_sentinel, antialias=antialias_sentinel - ) - transform._transformed_types = (mocker.MagicMock,) - - size_sentinel = mocker.MagicMock() - mocker.patch( - "torchvision.transforms.v2._geometry.RandomResize._get_params", - return_value=dict(size=size_sentinel), - ) - - inpt_sentinel = mocker.MagicMock() - - mock_resize = mocker.patch("torchvision.transforms.v2._geometry.F.resize") - transform(inpt_sentinel) - - mock_resize.assert_called_with( - inpt_sentinel, size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel - ) - - -class TestToDtype: - @pytest.mark.parametrize( - ("dtype", "expected_dtypes"), - [ - ( - torch.float64, - { - datapoints.Video: torch.float64, - datapoints.Image: torch.float64, - datapoints.BoundingBox: torch.float64, - }, - ), - ( - {datapoints.Video: torch.int32, datapoints.Image: torch.float32, datapoints.BoundingBox: torch.float64}, - {datapoints.Video: torch.int32, datapoints.Image: torch.float32, datapoints.BoundingBox: torch.float64}, - ), - ], - ) - def test_call(self, dtype, expected_dtypes): - sample = dict( - video=make_video(dtype=torch.int64), - image=make_image(dtype=torch.uint8), - bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, dtype=torch.float32), - str="str", - int=0, - ) - - transform = transforms.ToDtype(dtype) - transformed_sample = transform(sample) - - for key, value in sample.items(): - value_type = type(value) - transformed_value = transformed_sample[key] - - # make sure the transformation retains the type - assert isinstance(transformed_value, value_type) - - if isinstance(value, torch.Tensor): - assert transformed_value.dtype is expected_dtypes[value_type] - else: - assert transformed_value is value - - @pytest.mark.filterwarnings("error") - def test_plain_tensor_call(self): - tensor = torch.empty((), dtype=torch.float32) - transform = transforms.ToDtype({torch.Tensor: torch.float64}) - - assert transform(tensor).dtype is torch.float64 - - @pytest.mark.parametrize("other_type", [datapoints.Image, datapoints.Video]) - def test_plain_tensor_warning(self, other_type): - with pytest.warns(UserWarning, match=re.escape("`torch.Tensor` will *not* be transformed")): - transforms.ToDtype(dtype={torch.Tensor: torch.float32, other_type: torch.float64}) - - class TestPermuteDimensions: @pytest.mark.parametrize( ("dims", "inverse_dims"), [ ( - {datapoints.Image: (2, 1, 0), datapoints.Video: None}, - {datapoints.Image: (2, 1, 0), datapoints.Video: None}, + {Image: (2, 1, 0), Video: None}, + {Image: (2, 1, 0), Video: None}, ), ( - {datapoints.Image: (2, 1, 0), datapoints.Video: (1, 2, 3, 0)}, - {datapoints.Image: (2, 1, 0), datapoints.Video: (3, 0, 1, 2)}, + {Image: (2, 1, 0), Video: (1, 2, 3, 0)}, + {Image: (2, 1, 0), Video: (3, 0, 1, 2)}, ), ], ) def test_call(self, dims, inverse_dims): sample = dict( image=make_image(), - bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY), + bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY), video=make_video(), str="str", int=0, ) - transform = proto_transforms.PermuteDimensions(dims) + transform = transforms.PermuteDimensions(dims) transformed_sample = transform(sample) for key, value in sample.items(): value_type = type(value) transformed_value = transformed_sample[key] - if check_type( - value, (datapoints.Image, torchvision.transforms.v2.utils.is_simple_tensor, datapoints.Video) - ): + if check_type(value, (Image, is_simple_tensor, Video)): if transform.dims.get(value_type) is not None: assert transformed_value.permute(inverse_dims[value_type]).equal(value) assert type(transformed_value) == torch.Tensor @@ -2053,14 +413,14 @@ def test_call(self, dims, inverse_dims): @pytest.mark.filterwarnings("error") def test_plain_tensor_call(self): tensor = torch.empty((2, 3, 4)) - transform = proto_transforms.PermuteDimensions(dims=(1, 2, 0)) + transform = transforms.PermuteDimensions(dims=(1, 2, 0)) assert transform(tensor).shape == (3, 4, 2) - @pytest.mark.parametrize("other_type", [datapoints.Image, datapoints.Video]) + @pytest.mark.parametrize("other_type", [Image, Video]) def test_plain_tensor_warning(self, other_type): with pytest.warns(UserWarning, match=re.escape("`torch.Tensor` will *not* be transformed")): - proto_transforms.PermuteDimensions(dims={torch.Tensor: (0, 1), other_type: (1, 0)}) + transforms.PermuteDimensions(dims={torch.Tensor: (0, 1), other_type: (1, 0)}) class TestTransposeDimensions: @@ -2068,19 +428,19 @@ class TestTransposeDimensions: "dims", [ (-1, -2), - {datapoints.Image: (1, 2), datapoints.Video: None}, + {Image: (1, 2), Video: None}, ], ) def test_call(self, dims): sample = dict( image=make_image(), - bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY), + bounding_box=make_bounding_box(format=BoundingBoxFormat.XYXY), video=make_video(), str="str", int=0, ) - transform = proto_transforms.TransposeDimensions(dims) + transform = transforms.TransposeDimensions(dims) transformed_sample = transform(sample) for key, value in sample.items(): @@ -2088,9 +448,7 @@ def test_call(self, dims): transformed_value = transformed_sample[key] transposed_dims = transform.dims.get(value_type) - if check_type( - value, (datapoints.Image, torchvision.transforms.v2.utils.is_simple_tensor, datapoints.Video) - ): + if check_type(value, (Image, is_simple_tensor, Video)): if transposed_dims is not None: assert transformed_value.transpose(*transposed_dims).equal(value) assert type(transformed_value) == torch.Tensor @@ -2100,372 +458,78 @@ def test_call(self, dims): @pytest.mark.filterwarnings("error") def test_plain_tensor_call(self): tensor = torch.empty((2, 3, 4)) - transform = proto_transforms.TransposeDimensions(dims=(0, 2)) + transform = transforms.TransposeDimensions(dims=(0, 2)) assert transform(tensor).shape == (4, 3, 2) - @pytest.mark.parametrize("other_type", [datapoints.Image, datapoints.Video]) + @pytest.mark.parametrize("other_type", [Image, Video]) def test_plain_tensor_warning(self, other_type): with pytest.warns(UserWarning, match=re.escape("`torch.Tensor` will *not* be transformed")): - proto_transforms.TransposeDimensions(dims={torch.Tensor: (0, 1), other_type: (1, 0)}) - - -class TestUniformTemporalSubsample: - @pytest.mark.parametrize( - "inpt", - [ - torch.zeros(10, 3, 8, 8), - torch.zeros(1, 10, 3, 8, 8), - datapoints.Video(torch.zeros(1, 10, 3, 8, 8)), - ], - ) - def test__transform(self, inpt): - num_samples = 5 - transform = transforms.UniformTemporalSubsample(num_samples) - - output = transform(inpt) - assert type(output) is type(inpt) - assert output.shape[-4] == num_samples - assert output.dtype == inpt.dtype - - -# TODO: remove this test in 0.17 when the default of antialias changes to True -def test_antialias_warning(): - pil_img = PIL.Image.new("RGB", size=(10, 10), color=127) - tensor_img = torch.randint(0, 256, size=(3, 10, 10), dtype=torch.uint8) - tensor_video = torch.randint(0, 256, size=(2, 3, 10, 10), dtype=torch.uint8) - - match = "The default value of the antialias parameter" - with pytest.warns(UserWarning, match=match): - transforms.Resize((20, 20))(tensor_img) - with pytest.warns(UserWarning, match=match): - transforms.RandomResizedCrop((20, 20))(tensor_img) - with pytest.warns(UserWarning, match=match): - transforms.ScaleJitter((20, 20))(tensor_img) - with pytest.warns(UserWarning, match=match): - transforms.RandomShortestSize((20, 20))(tensor_img) - with pytest.warns(UserWarning, match=match): - transforms.RandomResize(10, 20)(tensor_img) - - with pytest.warns(UserWarning, match=match): - transforms.functional.resize(tensor_img, (20, 20)) - with pytest.warns(UserWarning, match=match): - transforms.functional.resize_image_tensor(tensor_img, (20, 20)) - - with pytest.warns(UserWarning, match=match): - transforms.functional.resize(tensor_video, (20, 20)) - with pytest.warns(UserWarning, match=match): - transforms.functional.resize_video(tensor_video, (20, 20)) - - with pytest.warns(UserWarning, match=match): - datapoints.Image(tensor_img).resize((20, 20)) - with pytest.warns(UserWarning, match=match): - datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20)) - - with pytest.warns(UserWarning, match=match): - datapoints.Video(tensor_video).resize((20, 20)) - with pytest.warns(UserWarning, match=match): - datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20)) - - with warnings.catch_warnings(): - warnings.simplefilter("error") - transforms.Resize((20, 20))(pil_img) - transforms.RandomResizedCrop((20, 20))(pil_img) - transforms.ScaleJitter((20, 20))(pil_img) - transforms.RandomShortestSize((20, 20))(pil_img) - transforms.RandomResize(10, 20)(pil_img) - transforms.functional.resize(pil_img, (20, 20)) - - transforms.Resize((20, 20), antialias=True)(tensor_img) - transforms.RandomResizedCrop((20, 20), antialias=True)(tensor_img) - transforms.ScaleJitter((20, 20), antialias=True)(tensor_img) - transforms.RandomShortestSize((20, 20), antialias=True)(tensor_img) - transforms.RandomResize(10, 20, antialias=True)(tensor_img) - - transforms.functional.resize(tensor_img, (20, 20), antialias=True) - transforms.functional.resize_image_tensor(tensor_img, (20, 20), antialias=True) - transforms.functional.resize(tensor_video, (20, 20), antialias=True) - transforms.functional.resize_video(tensor_video, (20, 20), antialias=True) - - datapoints.Image(tensor_img).resize((20, 20), antialias=True) - datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20), antialias=True) - datapoints.Video(tensor_video).resize((20, 20), antialias=True) - datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20), antialias=True) - - -@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, datapoints.Image)) -@pytest.mark.parametrize("label_type", (torch.Tensor, int)) -@pytest.mark.parametrize("dataset_return_type", (dict, tuple)) -@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImageTensor)) -def test_classif_preset(image_type, label_type, dataset_return_type, to_tensor): - - image = datapoints.Image(torch.randint(0, 256, size=(1, 3, 250, 250), dtype=torch.uint8)) - if image_type is PIL.Image: - image = to_pil_image(image[0]) - elif image_type is torch.Tensor: - image = image.as_subclass(torch.Tensor) - assert is_simple_tensor(image) - - label = 1 if label_type is int else torch.tensor([1]) - - if dataset_return_type is dict: - sample = { - "image": image, - "label": label, - } - else: - sample = image, label + transforms.TransposeDimensions(dims={torch.Tensor: (0, 1), other_type: (1, 0)}) - t = transforms.Compose( - [ - transforms.RandomResizedCrop((224, 224)), - transforms.RandomHorizontalFlip(p=1), - transforms.RandAugment(), - transforms.TrivialAugmentWide(), - transforms.AugMix(), - transforms.AutoAugment(), - to_tensor(), - # TODO: ConvertImageDtype is a pass-through on PIL images, is that - # intended? This results in a failure if we convert to tensor after - # it, because the image would still be uint8 which make Normalize - # fail. - transforms.ConvertImageDtype(torch.float), - transforms.Normalize(mean=[0, 0, 0], std=[1, 1, 1]), - transforms.RandomErasing(p=1), - ] - ) - out = t(sample) +import importlib.machinery +import importlib.util +from pathlib import Path - assert type(out) == type(sample) - if dataset_return_type is tuple: - out_image, out_label = out - else: - assert out.keys() == sample.keys() - out_image, out_label = out.values() +def import_transforms_from_references(reference): + HERE = Path(__file__).parent + PROJECT_ROOT = HERE.parent - assert out_image.shape[-2:] == (224, 224) - assert out_label == label - - -@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, datapoints.Image)) -@pytest.mark.parametrize("data_augmentation", ("hflip", "lsj", "multiscale", "ssd", "ssdlite")) -@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImageTensor)) -@pytest.mark.parametrize("sanitize", (True, False)) -def test_detection_preset(image_type, data_augmentation, to_tensor, sanitize): - torch.manual_seed(0) - if data_augmentation == "hflip": - t = [ - transforms.RandomHorizontalFlip(p=1), - to_tensor(), - transforms.ConvertImageDtype(torch.float), - ] - elif data_augmentation == "lsj": - t = [ - transforms.ScaleJitter(target_size=(1024, 1024), antialias=True), - # Note: replaced FixedSizeCrop with RandomCrop, becuase we're - # leaving FixedSizeCrop in prototype for now, and it expects Label - # classes which we won't release yet. - # transforms.FixedSizeCrop( - # size=(1024, 1024), fill=defaultdict(lambda: (123.0, 117.0, 104.0), {datapoints.Mask: 0}) - # ), - transforms.RandomCrop((1024, 1024), pad_if_needed=True), - transforms.RandomHorizontalFlip(p=1), - to_tensor(), - transforms.ConvertImageDtype(torch.float), - ] - elif data_augmentation == "multiscale": - t = [ - transforms.RandomShortestSize( - min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333, antialias=True - ), - transforms.RandomHorizontalFlip(p=1), - to_tensor(), - transforms.ConvertImageDtype(torch.float), - ] - elif data_augmentation == "ssd": - t = [ - transforms.RandomPhotometricDistort(p=1), - transforms.RandomZoomOut(fill=defaultdict(lambda: (123.0, 117.0, 104.0), {datapoints.Mask: 0})), - transforms.RandomIoUCrop(), - transforms.RandomHorizontalFlip(p=1), - to_tensor(), - transforms.ConvertImageDtype(torch.float), - ] - elif data_augmentation == "ssdlite": - t = [ - transforms.RandomIoUCrop(), - transforms.RandomHorizontalFlip(p=1), - to_tensor(), - transforms.ConvertImageDtype(torch.float), - ] - if sanitize: - t += [transforms.SanitizeBoundingBoxes()] - t = transforms.Compose(t) - - num_boxes = 5 - H = W = 250 - - image = datapoints.Image(torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8)) - if image_type is PIL.Image: - image = to_pil_image(image[0]) - elif image_type is torch.Tensor: - image = image.as_subclass(torch.Tensor) - assert is_simple_tensor(image) - - label = torch.randint(0, 10, size=(num_boxes,)) - - boxes = torch.randint(0, min(H, W) // 2, size=(num_boxes, 4)) - boxes[:, 2:] += boxes[:, :2] - boxes = boxes.clamp(min=0, max=min(H, W)) - boxes = datapoints.BoundingBox(boxes, format="XYXY", spatial_size=(H, W)) - - masks = datapoints.Mask(torch.randint(0, 2, size=(num_boxes, H, W), dtype=torch.uint8)) - - sample = { - "image": image, - "label": label, - "boxes": boxes, - "masks": masks, - } - - out = t(sample) - - if to_tensor is transforms.ToTensor and image_type is not datapoints.Image: - assert is_simple_tensor(out["image"]) - else: - assert isinstance(out["image"], datapoints.Image) - assert isinstance(out["label"], type(sample["label"])) - - num_boxes_expected = { - # ssd and ssdlite contain RandomIoUCrop which may "remove" some bbox. It - # doesn't remove them strictly speaking, it just marks some boxes as - # degenerate and those boxes will be later removed by - # SanitizeBoundingBoxes(), which we add to the pipelines if the sanitize - # param is True. - # Note that the values below are probably specific to the random seed - # set above (which is fine). - (True, "ssd"): 4, - (True, "ssdlite"): 4, - }.get((sanitize, data_augmentation), num_boxes) - - assert out["boxes"].shape[0] == out["masks"].shape[0] == out["label"].shape[0] == num_boxes_expected - - -@pytest.mark.parametrize("min_size", (1, 10)) -@pytest.mark.parametrize( - "labels_getter", ("default", "labels", lambda inputs: inputs["labels"], None, lambda inputs: None) -) -def test_sanitize_bounding_boxes(min_size, labels_getter): - H, W = 256, 128 - - boxes_and_validity = [ - ([0, 1, 10, 1], False), # Y1 == Y2 - ([0, 1, 0, 20], False), # X1 == X2 - ([0, 0, min_size - 1, 10], False), # H < min_size - ([0, 0, 10, min_size - 1], False), # W < min_size - ([0, 0, 10, H + 1], False), # Y2 > H - ([0, 0, W + 1, 10], False), # X2 > W - ([-1, 1, 10, 20], False), # any < 0 - ([0, 0, -1, 20], False), # any < 0 - ([0, 0, -10, -1], False), # any < 0 - ([0, 0, min_size, 10], True), # H < min_size - ([0, 0, 10, min_size], True), # W < min_size - ([0, 0, W, H], True), # TODO: Is that actually OK?? Should it be -1? - ([1, 1, 30, 20], True), - ([0, 0, 10, 10], True), - ([1, 1, 30, 20], True), - ] - - random.shuffle(boxes_and_validity) # For test robustness: mix order of wrong and correct cases - boxes, is_valid_mask = zip(*boxes_and_validity) - valid_indices = [i for (i, is_valid) in enumerate(is_valid_mask) if is_valid] - - boxes = torch.tensor(boxes) - labels = torch.arange(boxes.shape[0]) - - boxes = datapoints.BoundingBox( - boxes, - format=datapoints.BoundingBoxFormat.XYXY, - spatial_size=(H, W), + loader = importlib.machinery.SourceFileLoader( + "transforms", str(PROJECT_ROOT / "references" / reference / "transforms.py") ) + spec = importlib.util.spec_from_loader("transforms", loader) + module = importlib.util.module_from_spec(spec) + loader.exec_module(module) + return module - masks = datapoints.Mask(torch.randint(0, 2, size=(boxes.shape[0], H, W))) - - sample = { - "image": torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8), - "labels": labels, - "boxes": boxes, - "whatever": torch.rand(10), - "None": None, - "masks": masks, - } - out = transforms.SanitizeBoundingBoxes(min_size=min_size, labels_getter=labels_getter)(sample) +det_transforms = import_transforms_from_references("detection") - assert out["image"] is sample["image"] - assert out["whatever"] is sample["whatever"] - if labels_getter is None or (callable(labels_getter) and labels_getter({"labels": "blah"}) is None): - assert out["labels"] is sample["labels"] - else: - assert isinstance(out["labels"], torch.Tensor) - assert out["boxes"].shape[0] == out["labels"].shape[0] == out["masks"].shape[0] - # This works because we conveniently set labels to arange(num_boxes) - assert out["labels"].tolist() == valid_indices +def test_fixed_sized_crop_against_detection_reference(): + def make_datapoints(): + size = (600, 800) + num_objects = 22 + pil_image = to_image_pil(make_image(size=size, color_space="RGB")) + target = { + "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "labels": make_label(extra_dims=(num_objects,), categories=80), + "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), + } -@pytest.mark.parametrize("key", ("labels", "LABELS", "LaBeL", "SOME_WEIRD_KEY_THAT_HAS_LABeL_IN_IT")) -def test_sanitize_bounding_boxes_default_heuristic(key): - labels = torch.arange(10) - d = {key: labels} - assert transforms.SanitizeBoundingBoxes._find_labels_default_heuristic(d) is labels - - if key.lower() != "labels": - # If "labels" is in the dict (case-insensitive), - # it takes precedence over other keys which would otherwise be a match - d = {key: "something_else", "labels": labels} - assert transforms.SanitizeBoundingBoxes._find_labels_default_heuristic(d) is labels - + yield (pil_image, target) -def test_sanitize_bounding_boxes_errors(): + tensor_image = torch.Tensor(make_image(size=size, color_space="RGB")) + target = { + "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "labels": make_label(extra_dims=(num_objects,), categories=80), + "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), + } - good_bbox = datapoints.BoundingBox( - [[0, 0, 10, 10]], - format=datapoints.BoundingBoxFormat.XYXY, - spatial_size=(20, 20), - ) + yield (tensor_image, target) - with pytest.raises(ValueError, match="min_size must be >= 1"): - transforms.SanitizeBoundingBoxes(min_size=0) - with pytest.raises(ValueError, match="labels_getter should either be a str"): - transforms.SanitizeBoundingBoxes(labels_getter=12) + datapoint_image = make_image(size=size, color_space="RGB") + target = { + "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "labels": make_label(extra_dims=(num_objects,), categories=80), + "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), + } - with pytest.raises(ValueError, match="Could not infer where the labels are"): - bad_labels_key = {"bbox": good_bbox, "BAD_KEY": torch.arange(good_bbox.shape[0])} - transforms.SanitizeBoundingBoxes()(bad_labels_key) + yield (datapoint_image, target) - with pytest.raises(ValueError, match="If labels_getter is a str or 'default'"): - not_a_dict = (good_bbox, torch.arange(good_bbox.shape[0])) - transforms.SanitizeBoundingBoxes()(not_a_dict) + t = transforms.FixedSizeCrop((1024, 1024), fill=0) + t_ref = det_transforms.FixedSizeCrop((1024, 1024), fill=0) - with pytest.raises(ValueError, match="must be a tensor"): - not_a_tensor = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0]).tolist()} - transforms.SanitizeBoundingBoxes()(not_a_tensor) + for dp in make_datapoints(): + # We should use prototype transform first as reference transform performs inplace target update + torch.manual_seed(12) + output = t(dp) - with pytest.raises(ValueError, match="Number of boxes"): - different_sizes = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0] + 3)} - transforms.SanitizeBoundingBoxes()(different_sizes) + torch.manual_seed(12) + expected_output = t_ref(*dp) - with pytest.raises(ValueError, match="boxes must be of shape"): - bad_bbox = datapoints.BoundingBox( # batch with 2 elements - [ - [[0, 0, 10, 10]], - [[0, 0, 10, 10]], - ], - format=datapoints.BoundingBoxFormat.XYXY, - spatial_size=(20, 20), - ) - different_sizes = {"bbox": bad_bbox, "labels": torch.arange(bad_bbox.shape[0])} - transforms.SanitizeBoundingBoxes()(different_sizes) + assert_equal(expected_output, output) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py new file mode 100644 index 00000000000..a792c94d428 --- /dev/null +++ b/test/test_transforms_v2.py @@ -0,0 +1,2032 @@ +import itertools +import pathlib +import random +import re +import warnings +from collections import defaultdict + +import numpy as np + +import PIL.Image +import pytest +import torch +import torchvision.transforms.v2 as transforms + +from common_utils import ( + assert_equal, + cpu_and_gpu, + make_bounding_box, + make_bounding_boxes, + make_detection_mask, + make_image, + make_images, + make_segmentation_mask, + make_video, + make_videos, +) +from torch.utils._pytree import tree_flatten, tree_unflatten +from torchvision import datapoints +from torchvision.ops.boxes import box_iou +from torchvision.transforms.functional import InterpolationMode, pil_to_tensor, to_pil_image +from torchvision.transforms.v2 import functional as F +from torchvision.transforms.v2.utils import check_type, is_simple_tensor, query_chw + + +def make_vanilla_tensor_images(*args, **kwargs): + for image in make_images(*args, **kwargs): + if image.ndim > 3: + continue + yield image.data + + +def make_pil_images(*args, **kwargs): + for image in make_vanilla_tensor_images(*args, **kwargs): + yield to_pil_image(image) + + +def make_vanilla_tensor_bounding_boxes(*args, **kwargs): + for bounding_box in make_bounding_boxes(*args, **kwargs): + yield bounding_box.data + + +def parametrize(transforms_with_inputs): + return pytest.mark.parametrize( + ("transform", "input"), + [ + pytest.param( + transform, + input, + id=f"{type(transform).__name__}-{type(input).__module__}.{type(input).__name__}-{idx}", + ) + for transform, inputs in transforms_with_inputs + for idx, input in enumerate(inputs) + ], + ) + + +def auto_augment_adapter(transform, input, device): + adapted_input = {} + image_or_video_found = False + for key, value in input.items(): + if isinstance(value, (datapoints.BoundingBox, datapoints.Mask)): + # AA transforms don't support bounding boxes or masks + continue + elif check_type(value, (datapoints.Image, datapoints.Video, is_simple_tensor, PIL.Image.Image)): + if image_or_video_found: + # AA transforms only support a single image or video + continue + image_or_video_found = True + adapted_input[key] = value + return adapted_input + + +def linear_transformation_adapter(transform, input, device): + flat_inputs = list(input.values()) + c, h, w = query_chw( + [ + item + for item, needs_transform in zip(flat_inputs, transforms.Transform()._needs_transform_list(flat_inputs)) + if needs_transform + ] + ) + num_elements = c * h * w + transform.transformation_matrix = torch.randn((num_elements, num_elements), device=device) + transform.mean_vector = torch.randn((num_elements,), device=device) + return {key: value for key, value in input.items() if not isinstance(value, PIL.Image.Image)} + + +def normalize_adapter(transform, input, device): + adapted_input = {} + for key, value in input.items(): + if isinstance(value, PIL.Image.Image): + # normalize doesn't support PIL images + continue + elif check_type(value, (datapoints.Image, datapoints.Video, is_simple_tensor)): + # normalize doesn't support integer images + value = F.convert_dtype(value, torch.float32) + adapted_input[key] = value + return adapted_input + + +class TestSmoke: + @pytest.mark.parametrize( + ("transform", "adapter"), + [ + (transforms.RandomErasing(p=1.0), None), + (transforms.AugMix(), auto_augment_adapter), + (transforms.AutoAugment(), auto_augment_adapter), + (transforms.RandAugment(), auto_augment_adapter), + (transforms.TrivialAugmentWide(), auto_augment_adapter), + (transforms.ColorJitter(brightness=0.1, contrast=0.2, saturation=0.3, hue=0.15), None), + (transforms.Grayscale(), None), + (transforms.RandomAdjustSharpness(sharpness_factor=0.5, p=1.0), None), + (transforms.RandomAutocontrast(p=1.0), None), + (transforms.RandomEqualize(p=1.0), None), + (transforms.RandomGrayscale(p=1.0), None), + (transforms.RandomInvert(p=1.0), None), + (transforms.RandomPhotometricDistort(p=1.0), None), + (transforms.RandomPosterize(bits=4, p=1.0), None), + (transforms.RandomSolarize(threshold=0.5, p=1.0), None), + (transforms.CenterCrop([16, 16]), None), + (transforms.ElasticTransform(sigma=1.0), None), + (transforms.Pad(4), None), + (transforms.RandomAffine(degrees=30.0), None), + (transforms.RandomCrop([16, 16], pad_if_needed=True), None), + (transforms.RandomHorizontalFlip(p=1.0), None), + (transforms.RandomPerspective(p=1.0), None), + (transforms.RandomResize(min_size=10, max_size=20), None), + (transforms.RandomResizedCrop([16, 16]), None), + (transforms.RandomRotation(degrees=30), None), + (transforms.RandomShortestSize(min_size=10), None), + (transforms.RandomVerticalFlip(p=1.0), None), + (transforms.RandomZoomOut(p=1.0), None), + (transforms.Resize([16, 16], antialias=True), None), + (transforms.ScaleJitter((16, 16), scale_range=(0.8, 1.2)), None), + (transforms.ClampBoundingBox(), None), + (transforms.ConvertBoundingBoxFormat(datapoints.BoundingBoxFormat.CXCYWH), None), + (transforms.ConvertDtype(), None), + (transforms.GaussianBlur(kernel_size=3), None), + ( + transforms.LinearTransformation( + # These are just dummy values that will be filled by the adapter. We can't define them upfront, + # because for we neither know the spatial size nor the device at this point + transformation_matrix=torch.empty((1, 1)), + mean_vector=torch.empty((1,)), + ), + linear_transformation_adapter, + ), + (transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), normalize_adapter), + (transforms.ToDtype(torch.float64), None), + (transforms.UniformTemporalSubsample(num_samples=2), None), + ], + ids=lambda transform: type(transform).__name__, + ) + @pytest.mark.parametrize("container_type", [dict, list, tuple]) + @pytest.mark.parametrize( + "image_or_video", + [ + make_image(), + make_video(), + next(make_pil_images(color_spaces=["RGB"])), + next(make_vanilla_tensor_images()), + ], + ) + @pytest.mark.parametrize("device", cpu_and_gpu()) + def test_common(self, transform, adapter, container_type, image_or_video, device): + spatial_size = F.get_spatial_size(image_or_video) + input = dict( + image_or_video=image_or_video, + image_datapoint=make_image(size=spatial_size), + video_datapoint=make_video(size=spatial_size), + image_pil=next(make_pil_images(sizes=[spatial_size], color_spaces=["RGB"])), + bounding_box_xyxy=make_bounding_box( + format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(3,) + ), + bounding_box_xywh=make_bounding_box( + format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, extra_dims=(4,) + ), + bounding_box_cxcywh=make_bounding_box( + format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, extra_dims=(5,) + ), + bounding_box_degenerate_xyxy=datapoints.BoundingBox( + [ + [0, 0, 0, 0], # no height or width + [0, 0, 0, 1], # no height + [0, 0, 1, 0], # no width + [2, 0, 1, 1], # x1 > x2, y1 < y2 + [0, 2, 1, 1], # x1 < x2, y1 > y2 + [2, 2, 1, 1], # x1 > x2, y1 > y2 + ], + format=datapoints.BoundingBoxFormat.XYXY, + spatial_size=spatial_size, + ), + bounding_box_degenerate_xywh=datapoints.BoundingBox( + [ + [0, 0, 0, 0], # no height or width + [0, 0, 0, 1], # no height + [0, 0, 1, 0], # no width + [0, 0, 1, -1], # negative height + [0, 0, -1, 1], # negative width + [0, 0, -1, -1], # negative height and width + ], + format=datapoints.BoundingBoxFormat.XYWH, + spatial_size=spatial_size, + ), + bounding_box_degenerate_cxcywh=datapoints.BoundingBox( + [ + [0, 0, 0, 0], # no height or width + [0, 0, 0, 1], # no height + [0, 0, 1, 0], # no width + [0, 0, 1, -1], # negative height + [0, 0, -1, 1], # negative width + [0, 0, -1, -1], # negative height and width + ], + format=datapoints.BoundingBoxFormat.CXCYWH, + spatial_size=spatial_size, + ), + detection_mask=make_detection_mask(size=spatial_size), + segmentation_mask=make_segmentation_mask(size=spatial_size), + int=0, + float=0.0, + bool=True, + none=None, + str="str", + path=pathlib.Path.cwd(), + object=object(), + tensor=torch.empty(5), + array=np.empty(5), + ) + if adapter is not None: + input = adapter(transform, input, device) + + if container_type in {tuple, list}: + input = container_type(input.values()) + + input_flat, input_spec = tree_flatten(input) + input_flat = [item.to(device) if isinstance(item, torch.Tensor) else item for item in input_flat] + input = tree_unflatten(input_flat, input_spec) + + torch.manual_seed(0) + output = transform(input) + output_flat, output_spec = tree_flatten(output) + + assert output_spec == input_spec + + for output_item, input_item, should_be_transformed in zip( + output_flat, input_flat, transforms.Transform()._needs_transform_list(input_flat) + ): + if should_be_transformed: + assert type(output_item) is type(input_item) + else: + assert output_item is input_item + + @parametrize( + [ + ( + transform, + itertools.chain.from_iterable( + fn( + color_spaces=[ + "GRAY", + "RGB", + ], + dtypes=[torch.uint8], + extra_dims=[(), (4,)], + **(dict(num_frames=["random"]) if fn is make_videos else dict()), + ) + for fn in [ + make_images, + make_vanilla_tensor_images, + make_pil_images, + make_videos, + ] + ), + ) + for transform in ( + transforms.RandAugment(), + transforms.TrivialAugmentWide(), + transforms.AutoAugment(), + transforms.AugMix(), + ) + ] + ) + def test_auto_augment(self, transform, input): + transform(input) + + @parametrize( + [ + ( + transforms.Normalize(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0]), + itertools.chain.from_iterable( + fn(color_spaces=["RGB"], dtypes=[torch.float32]) + for fn in [ + make_images, + make_vanilla_tensor_images, + make_videos, + ] + ), + ), + ] + ) + def test_normalize(self, transform, input): + transform(input) + + @parametrize( + [ + ( + transforms.RandomResizedCrop([16, 16], antialias=True), + itertools.chain( + make_images(extra_dims=[(4,)]), + make_vanilla_tensor_images(), + make_pil_images(), + make_videos(extra_dims=[()]), + ), + ) + ] + ) + def test_random_resized_crop(self, transform, input): + transform(input) + + +@pytest.mark.parametrize( + "flat_inputs", + itertools.permutations( + [ + next(make_vanilla_tensor_images()), + next(make_vanilla_tensor_images()), + next(make_pil_images()), + make_image(), + next(make_videos()), + ], + 3, + ), +) +def test_simple_tensor_heuristic(flat_inputs): + def split_on_simple_tensor(to_split): + # This takes a sequence that is structurally aligned with `flat_inputs` and splits its items into three parts: + # 1. The first simple tensor. If none is present, this will be `None` + # 2. A list of the remaining simple tensors + # 3. A list of all other items + simple_tensors = [] + others = [] + # Splitting always happens on the original `flat_inputs` to avoid any erroneous type changes by the transform to + # affect the splitting. + for item, inpt in zip(to_split, flat_inputs): + (simple_tensors if is_simple_tensor(inpt) else others).append(item) + return simple_tensors[0] if simple_tensors else None, simple_tensors[1:], others + + class CopyCloneTransform(transforms.Transform): + def _transform(self, inpt, params): + return inpt.clone() if isinstance(inpt, torch.Tensor) else inpt.copy() + + @staticmethod + def was_applied(output, inpt): + identity = output is inpt + if identity: + return False + + # Make sure nothing fishy is going on + assert_equal(output, inpt) + return True + + first_simple_tensor_input, other_simple_tensor_inputs, other_inputs = split_on_simple_tensor(flat_inputs) + + transform = CopyCloneTransform() + transformed_sample = transform(flat_inputs) + + first_simple_tensor_output, other_simple_tensor_outputs, other_outputs = split_on_simple_tensor(transformed_sample) + + if first_simple_tensor_input is not None: + if other_inputs: + assert not transform.was_applied(first_simple_tensor_output, first_simple_tensor_input) + else: + assert transform.was_applied(first_simple_tensor_output, first_simple_tensor_input) + + for output, inpt in zip(other_simple_tensor_outputs, other_simple_tensor_inputs): + assert not transform.was_applied(output, inpt) + + for input, output in zip(other_inputs, other_outputs): + assert transform.was_applied(output, input) + + +@pytest.mark.parametrize("p", [0.0, 1.0]) +class TestRandomHorizontalFlip: + def input_expected_image_tensor(self, p, dtype=torch.float32): + input = torch.tensor([[[0, 1], [0, 1]], [[1, 0], [1, 0]]], dtype=dtype) + expected = torch.tensor([[[1, 0], [1, 0]], [[0, 1], [0, 1]]], dtype=dtype) + + return input, expected if p == 1 else input + + def test_simple_tensor(self, p): + input, expected = self.input_expected_image_tensor(p) + transform = transforms.RandomHorizontalFlip(p=p) + + actual = transform(input) + + assert_equal(expected, actual) + + def test_pil_image(self, p): + input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8) + transform = transforms.RandomHorizontalFlip(p=p) + + actual = transform(to_pil_image(input)) + + assert_equal(expected, pil_to_tensor(actual)) + + def test_datapoints_image(self, p): + input, expected = self.input_expected_image_tensor(p) + transform = transforms.RandomHorizontalFlip(p=p) + + actual = transform(datapoints.Image(input)) + + assert_equal(datapoints.Image(expected), actual) + + def test_datapoints_mask(self, p): + input, expected = self.input_expected_image_tensor(p) + transform = transforms.RandomHorizontalFlip(p=p) + + actual = transform(datapoints.Mask(input)) + + assert_equal(datapoints.Mask(expected), actual) + + def test_datapoints_bounding_box(self, p): + input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)) + transform = transforms.RandomHorizontalFlip(p=p) + + actual = transform(input) + + expected_image_tensor = torch.tensor([5, 0, 10, 5]) if p == 1.0 else input + expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor) + assert_equal(expected, actual) + assert actual.format == expected.format + assert actual.spatial_size == expected.spatial_size + + +@pytest.mark.parametrize("p", [0.0, 1.0]) +class TestRandomVerticalFlip: + def input_expected_image_tensor(self, p, dtype=torch.float32): + input = torch.tensor([[[1, 1], [0, 0]], [[1, 1], [0, 0]]], dtype=dtype) + expected = torch.tensor([[[0, 0], [1, 1]], [[0, 0], [1, 1]]], dtype=dtype) + + return input, expected if p == 1 else input + + def test_simple_tensor(self, p): + input, expected = self.input_expected_image_tensor(p) + transform = transforms.RandomVerticalFlip(p=p) + + actual = transform(input) + + assert_equal(expected, actual) + + def test_pil_image(self, p): + input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8) + transform = transforms.RandomVerticalFlip(p=p) + + actual = transform(to_pil_image(input)) + + assert_equal(expected, pil_to_tensor(actual)) + + def test_datapoints_image(self, p): + input, expected = self.input_expected_image_tensor(p) + transform = transforms.RandomVerticalFlip(p=p) + + actual = transform(datapoints.Image(input)) + + assert_equal(datapoints.Image(expected), actual) + + def test_datapoints_mask(self, p): + input, expected = self.input_expected_image_tensor(p) + transform = transforms.RandomVerticalFlip(p=p) + + actual = transform(datapoints.Mask(input)) + + assert_equal(datapoints.Mask(expected), actual) + + def test_datapoints_bounding_box(self, p): + input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)) + transform = transforms.RandomVerticalFlip(p=p) + + actual = transform(input) + + expected_image_tensor = torch.tensor([0, 5, 5, 10]) if p == 1.0 else input + expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor) + assert_equal(expected, actual) + assert actual.format == expected.format + assert actual.spatial_size == expected.spatial_size + + +class TestPad: + def test_assertions(self): + with pytest.raises(TypeError, match="Got inappropriate padding arg"): + transforms.Pad("abc") + + with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"): + transforms.Pad([-0.7, 0, 0.7]) + + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.Pad(12, fill="abc") + + with pytest.raises(ValueError, match="Padding mode should be either"): + transforms.Pad(12, padding_mode="abc") + + @pytest.mark.parametrize("padding", [1, (1, 2), [1, 2, 3, 4]]) + @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) + @pytest.mark.parametrize("padding_mode", ["constant", "edge"]) + def test__transform(self, padding, fill, padding_mode, mocker): + transform = transforms.Pad(padding, fill=fill, padding_mode=padding_mode) + + fn = mocker.patch("torchvision.transforms.v2.functional.pad") + inpt = mocker.MagicMock(spec=datapoints.Image) + _ = transform(inpt) + + fill = transforms._utils._convert_fill_arg(fill) + if isinstance(padding, tuple): + padding = list(padding) + fn.assert_called_once_with(inpt, padding=padding, fill=fill, padding_mode=padding_mode) + + @pytest.mark.parametrize("fill", [12, {datapoints.Image: 12, datapoints.Mask: 34}]) + def test__transform_image_mask(self, fill, mocker): + transform = transforms.Pad(1, fill=fill, padding_mode="constant") + + fn = mocker.patch("torchvision.transforms.v2.functional.pad") + image = datapoints.Image(torch.rand(3, 32, 32)) + mask = datapoints.Mask(torch.randint(0, 5, size=(32, 32))) + inpt = [image, mask] + _ = transform(inpt) + + if isinstance(fill, int): + fill = transforms._utils._convert_fill_arg(fill) + calls = [ + mocker.call(image, padding=1, fill=fill, padding_mode="constant"), + mocker.call(mask, padding=1, fill=fill, padding_mode="constant"), + ] + else: + fill_img = transforms._utils._convert_fill_arg(fill[type(image)]) + fill_mask = transforms._utils._convert_fill_arg(fill[type(mask)]) + calls = [ + mocker.call(image, padding=1, fill=fill_img, padding_mode="constant"), + mocker.call(mask, padding=1, fill=fill_mask, padding_mode="constant"), + ] + fn.assert_has_calls(calls) + + +class TestRandomZoomOut: + def test_assertions(self): + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomZoomOut(fill="abc") + + with pytest.raises(TypeError, match="should be a sequence of length"): + transforms.RandomZoomOut(0, side_range=0) + + with pytest.raises(ValueError, match="Invalid canvas side range"): + transforms.RandomZoomOut(0, side_range=[4.0, 1.0]) + + @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) + @pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]]) + def test__get_params(self, fill, side_range, mocker): + transform = transforms.RandomZoomOut(fill=fill, side_range=side_range) + + image = mocker.MagicMock(spec=datapoints.Image) + h, w = image.spatial_size = (24, 32) + + params = transform._get_params([image]) + + assert len(params["padding"]) == 4 + assert 0 <= params["padding"][0] <= (side_range[1] - 1) * w + assert 0 <= params["padding"][1] <= (side_range[1] - 1) * h + assert 0 <= params["padding"][2] <= (side_range[1] - 1) * w + assert 0 <= params["padding"][3] <= (side_range[1] - 1) * h + + @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) + @pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]]) + def test__transform(self, fill, side_range, mocker): + inpt = mocker.MagicMock(spec=datapoints.Image) + inpt.num_channels = 3 + inpt.spatial_size = (24, 32) + + transform = transforms.RandomZoomOut(fill=fill, side_range=side_range, p=1) + + fn = mocker.patch("torchvision.transforms.v2.functional.pad") + # vfdev-5, Feature Request: let's store params as Transform attribute + # This could be also helpful for users + # Otherwise, we can mock transform._get_params + torch.manual_seed(12) + _ = transform(inpt) + torch.manual_seed(12) + torch.rand(1) # random apply changes random state + params = transform._get_params([inpt]) + + fill = transforms._utils._convert_fill_arg(fill) + fn.assert_called_once_with(inpt, **params, fill=fill) + + @pytest.mark.parametrize("fill", [12, {datapoints.Image: 12, datapoints.Mask: 34}]) + def test__transform_image_mask(self, fill, mocker): + transform = transforms.RandomZoomOut(fill=fill, p=1.0) + + fn = mocker.patch("torchvision.transforms.v2.functional.pad") + image = datapoints.Image(torch.rand(3, 32, 32)) + mask = datapoints.Mask(torch.randint(0, 5, size=(32, 32))) + inpt = [image, mask] + + torch.manual_seed(12) + _ = transform(inpt) + torch.manual_seed(12) + torch.rand(1) # random apply changes random state + params = transform._get_params(inpt) + + if isinstance(fill, int): + fill = transforms._utils._convert_fill_arg(fill) + calls = [ + mocker.call(image, **params, fill=fill), + mocker.call(mask, **params, fill=fill), + ] + else: + fill_img = transforms._utils._convert_fill_arg(fill[type(image)]) + fill_mask = transforms._utils._convert_fill_arg(fill[type(mask)]) + calls = [ + mocker.call(image, **params, fill=fill_img), + mocker.call(mask, **params, fill=fill_mask), + ] + fn.assert_has_calls(calls) + + +class TestRandomRotation: + def test_assertions(self): + with pytest.raises(ValueError, match="is a single number, it must be positive"): + transforms.RandomRotation(-0.7) + + for d in [[-0.7], [-0.7, 0, 0.7]]: + with pytest.raises(ValueError, match="degrees should be a sequence of length 2"): + transforms.RandomRotation(d) + + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomRotation(12, fill="abc") + + with pytest.raises(TypeError, match="center should be a sequence of length"): + transforms.RandomRotation(12, center=12) + + with pytest.raises(ValueError, match="center should be a sequence of length"): + transforms.RandomRotation(12, center=[1, 2, 3]) + + def test__get_params(self): + angle_bound = 34 + transform = transforms.RandomRotation(angle_bound) + + params = transform._get_params(None) + assert -angle_bound <= params["angle"] <= angle_bound + + angle_bounds = [12, 34] + transform = transforms.RandomRotation(angle_bounds) + + params = transform._get_params(None) + assert angle_bounds[0] <= params["angle"] <= angle_bounds[1] + + @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)]) + @pytest.mark.parametrize("expand", [False, True]) + @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) + @pytest.mark.parametrize("center", [None, [2.0, 3.0]]) + def test__transform(self, degrees, expand, fill, center, mocker): + interpolation = InterpolationMode.BILINEAR + transform = transforms.RandomRotation( + degrees, interpolation=interpolation, expand=expand, fill=fill, center=center + ) + + if isinstance(degrees, (tuple, list)): + assert transform.degrees == [float(degrees[0]), float(degrees[1])] + else: + assert transform.degrees == [float(-degrees), float(degrees)] + + fn = mocker.patch("torchvision.transforms.v2.functional.rotate") + inpt = mocker.MagicMock(spec=datapoints.Image) + # vfdev-5, Feature Request: let's store params as Transform attribute + # This could be also helpful for users + # Otherwise, we can mock transform._get_params + torch.manual_seed(12) + _ = transform(inpt) + torch.manual_seed(12) + params = transform._get_params(inpt) + + fill = transforms._utils._convert_fill_arg(fill) + fn.assert_called_once_with(inpt, **params, interpolation=interpolation, expand=expand, fill=fill, center=center) + + @pytest.mark.parametrize("angle", [34, -87]) + @pytest.mark.parametrize("expand", [False, True]) + def test_boundingbox_spatial_size(self, angle, expand): + # Specific test for BoundingBox.rotate + bbox = datapoints.BoundingBox( + torch.tensor([1, 2, 3, 4]), format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(32, 32) + ) + img = datapoints.Image(torch.rand(1, 3, 32, 32)) + + out_img = img.rotate(angle, expand=expand) + out_bbox = bbox.rotate(angle, expand=expand) + + assert out_img.spatial_size == out_bbox.spatial_size + + +class TestRandomAffine: + def test_assertions(self): + with pytest.raises(ValueError, match="is a single number, it must be positive"): + transforms.RandomAffine(-0.7) + + for d in [[-0.7], [-0.7, 0, 0.7]]: + with pytest.raises(ValueError, match="degrees should be a sequence of length 2"): + transforms.RandomAffine(d) + + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomAffine(12, fill="abc") + + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomAffine(12, fill="abc") + + for kwargs in [ + {"center": 12}, + {"translate": 12}, + {"scale": 12}, + ]: + with pytest.raises(TypeError, match="should be a sequence of length"): + transforms.RandomAffine(12, **kwargs) + + for kwargs in [{"center": [1, 2, 3]}, {"translate": [1, 2, 3]}, {"scale": [1, 2, 3]}]: + with pytest.raises(ValueError, match="should be a sequence of length"): + transforms.RandomAffine(12, **kwargs) + + with pytest.raises(ValueError, match="translation values should be between 0 and 1"): + transforms.RandomAffine(12, translate=[-1.0, 2.0]) + + with pytest.raises(ValueError, match="scale values should be positive"): + transforms.RandomAffine(12, scale=[-1.0, 2.0]) + + with pytest.raises(ValueError, match="is a single number, it must be positive"): + transforms.RandomAffine(12, shear=-10) + + for s in [[-0.7], [-0.7, 0, 0.7]]: + with pytest.raises(ValueError, match="shear should be a sequence of length 2"): + transforms.RandomAffine(12, shear=s) + + @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)]) + @pytest.mark.parametrize("translate", [None, [0.1, 0.2]]) + @pytest.mark.parametrize("scale", [None, [0.7, 1.2]]) + @pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]]) + def test__get_params(self, degrees, translate, scale, shear, mocker): + image = mocker.MagicMock(spec=datapoints.Image) + image.num_channels = 3 + image.spatial_size = (24, 32) + h, w = image.spatial_size + + transform = transforms.RandomAffine(degrees, translate=translate, scale=scale, shear=shear) + params = transform._get_params([image]) + + if not isinstance(degrees, (list, tuple)): + assert -degrees <= params["angle"] <= degrees + else: + assert degrees[0] <= params["angle"] <= degrees[1] + + if translate is not None: + w_max = int(round(translate[0] * w)) + h_max = int(round(translate[1] * h)) + assert -w_max <= params["translate"][0] <= w_max + assert -h_max <= params["translate"][1] <= h_max + else: + assert params["translate"] == (0, 0) + + if scale is not None: + assert scale[0] <= params["scale"] <= scale[1] + else: + assert params["scale"] == 1.0 + + if shear is not None: + if isinstance(shear, float): + assert -shear <= params["shear"][0] <= shear + assert params["shear"][1] == 0.0 + elif len(shear) == 2: + assert shear[0] <= params["shear"][0] <= shear[1] + assert params["shear"][1] == 0.0 + else: + assert shear[0] <= params["shear"][0] <= shear[1] + assert shear[2] <= params["shear"][1] <= shear[3] + else: + assert params["shear"] == (0, 0) + + @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)]) + @pytest.mark.parametrize("translate", [None, [0.1, 0.2]]) + @pytest.mark.parametrize("scale", [None, [0.7, 1.2]]) + @pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]]) + @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) + @pytest.mark.parametrize("center", [None, [2.0, 3.0]]) + def test__transform(self, degrees, translate, scale, shear, fill, center, mocker): + interpolation = InterpolationMode.BILINEAR + transform = transforms.RandomAffine( + degrees, + translate=translate, + scale=scale, + shear=shear, + interpolation=interpolation, + fill=fill, + center=center, + ) + + if isinstance(degrees, (tuple, list)): + assert transform.degrees == [float(degrees[0]), float(degrees[1])] + else: + assert transform.degrees == [float(-degrees), float(degrees)] + + fn = mocker.patch("torchvision.transforms.v2.functional.affine") + inpt = mocker.MagicMock(spec=datapoints.Image) + inpt.num_channels = 3 + inpt.spatial_size = (24, 32) + + # vfdev-5, Feature Request: let's store params as Transform attribute + # This could be also helpful for users + # Otherwise, we can mock transform._get_params + torch.manual_seed(12) + _ = transform(inpt) + torch.manual_seed(12) + params = transform._get_params([inpt]) + + fill = transforms._utils._convert_fill_arg(fill) + fn.assert_called_once_with(inpt, **params, interpolation=interpolation, fill=fill, center=center) + + +class TestRandomCrop: + def test_assertions(self): + with pytest.raises(ValueError, match="Please provide only two dimensions"): + transforms.RandomCrop([10, 12, 14]) + + with pytest.raises(TypeError, match="Got inappropriate padding arg"): + transforms.RandomCrop([10, 12], padding="abc") + + with pytest.raises(ValueError, match="Padding must be an int or a 1, 2, or 4"): + transforms.RandomCrop([10, 12], padding=[-0.7, 0, 0.7]) + + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomCrop([10, 12], padding=1, fill="abc") + + with pytest.raises(ValueError, match="Padding mode should be either"): + transforms.RandomCrop([10, 12], padding=1, padding_mode="abc") + + @pytest.mark.parametrize("padding", [None, 1, [2, 3], [1, 2, 3, 4]]) + @pytest.mark.parametrize("size, pad_if_needed", [((10, 10), False), ((50, 25), True)]) + def test__get_params(self, padding, pad_if_needed, size, mocker): + image = mocker.MagicMock(spec=datapoints.Image) + image.num_channels = 3 + image.spatial_size = (24, 32) + h, w = image.spatial_size + + transform = transforms.RandomCrop(size, padding=padding, pad_if_needed=pad_if_needed) + params = transform._get_params([image]) + + if padding is not None: + if isinstance(padding, int): + pad_top = pad_bottom = pad_left = pad_right = padding + elif isinstance(padding, list) and len(padding) == 2: + pad_left = pad_right = padding[0] + pad_top = pad_bottom = padding[1] + elif isinstance(padding, list) and len(padding) == 4: + pad_left, pad_top, pad_right, pad_bottom = padding + + h += pad_top + pad_bottom + w += pad_left + pad_right + else: + pad_left = pad_right = pad_top = pad_bottom = 0 + + if pad_if_needed: + if w < size[1]: + diff = size[1] - w + pad_left += diff + pad_right += diff + w += 2 * diff + if h < size[0]: + diff = size[0] - h + pad_top += diff + pad_bottom += diff + h += 2 * diff + + padding = [pad_left, pad_top, pad_right, pad_bottom] + + assert 0 <= params["top"] <= h - size[0] + 1 + assert 0 <= params["left"] <= w - size[1] + 1 + assert params["height"] == size[0] + assert params["width"] == size[1] + assert params["needs_pad"] is any(padding) + assert params["padding"] == padding + + @pytest.mark.parametrize("padding", [None, 1, [2, 3], [1, 2, 3, 4]]) + @pytest.mark.parametrize("pad_if_needed", [False, True]) + @pytest.mark.parametrize("fill", [False, True]) + @pytest.mark.parametrize("padding_mode", ["constant", "edge"]) + def test__transform(self, padding, pad_if_needed, fill, padding_mode, mocker): + output_size = [10, 12] + transform = transforms.RandomCrop( + output_size, padding=padding, pad_if_needed=pad_if_needed, fill=fill, padding_mode=padding_mode + ) + + inpt = mocker.MagicMock(spec=datapoints.Image) + inpt.num_channels = 3 + inpt.spatial_size = (32, 32) + + expected = mocker.MagicMock(spec=datapoints.Image) + expected.num_channels = 3 + if isinstance(padding, int): + expected.spatial_size = (inpt.spatial_size[0] + padding, inpt.spatial_size[1] + padding) + elif isinstance(padding, list): + expected.spatial_size = ( + inpt.spatial_size[0] + sum(padding[0::2]), + inpt.spatial_size[1] + sum(padding[1::2]), + ) + else: + expected.spatial_size = inpt.spatial_size + _ = mocker.patch("torchvision.transforms.v2.functional.pad", return_value=expected) + fn_crop = mocker.patch("torchvision.transforms.v2.functional.crop") + + # vfdev-5, Feature Request: let's store params as Transform attribute + # This could be also helpful for users + # Otherwise, we can mock transform._get_params + torch.manual_seed(12) + _ = transform(inpt) + torch.manual_seed(12) + params = transform._get_params([inpt]) + if padding is None and not pad_if_needed: + fn_crop.assert_called_once_with( + inpt, top=params["top"], left=params["left"], height=output_size[0], width=output_size[1] + ) + elif not pad_if_needed: + fn_crop.assert_called_once_with( + expected, top=params["top"], left=params["left"], height=output_size[0], width=output_size[1] + ) + elif padding is None: + # vfdev-5: I do not know how to mock and test this case + pass + else: + # vfdev-5: I do not know how to mock and test this case + pass + + +class TestGaussianBlur: + def test_assertions(self): + with pytest.raises(ValueError, match="Kernel size should be a tuple/list of two integers"): + transforms.GaussianBlur([10, 12, 14]) + + with pytest.raises(ValueError, match="Kernel size value should be an odd and positive number"): + transforms.GaussianBlur(4) + + with pytest.raises( + TypeError, match="sigma should be a single int or float or a list/tuple with length 2 floats." + ): + transforms.GaussianBlur(3, sigma=[1, 2, 3]) + + with pytest.raises(ValueError, match="If sigma is a single number, it must be positive"): + transforms.GaussianBlur(3, sigma=-1.0) + + with pytest.raises(ValueError, match="sigma values should be positive and of the form"): + transforms.GaussianBlur(3, sigma=[2.0, 1.0]) + + @pytest.mark.parametrize("sigma", [10.0, [10.0, 12.0]]) + def test__get_params(self, sigma): + transform = transforms.GaussianBlur(3, sigma=sigma) + params = transform._get_params([]) + + if isinstance(sigma, float): + assert params["sigma"][0] == params["sigma"][1] == 10 + else: + assert sigma[0] <= params["sigma"][0] <= sigma[1] + assert sigma[0] <= params["sigma"][1] <= sigma[1] + + @pytest.mark.parametrize("kernel_size", [3, [3, 5], (5, 3)]) + @pytest.mark.parametrize("sigma", [2.0, [2.0, 3.0]]) + def test__transform(self, kernel_size, sigma, mocker): + transform = transforms.GaussianBlur(kernel_size=kernel_size, sigma=sigma) + + if isinstance(kernel_size, (tuple, list)): + assert transform.kernel_size == kernel_size + else: + kernel_size = (kernel_size, kernel_size) + assert transform.kernel_size == kernel_size + + if isinstance(sigma, (tuple, list)): + assert transform.sigma == sigma + else: + assert transform.sigma == [sigma, sigma] + + fn = mocker.patch("torchvision.transforms.v2.functional.gaussian_blur") + inpt = mocker.MagicMock(spec=datapoints.Image) + inpt.num_channels = 3 + inpt.spatial_size = (24, 32) + + # vfdev-5, Feature Request: let's store params as Transform attribute + # This could be also helpful for users + # Otherwise, we can mock transform._get_params + torch.manual_seed(12) + _ = transform(inpt) + torch.manual_seed(12) + params = transform._get_params([inpt]) + + fn.assert_called_once_with(inpt, kernel_size, **params) + + +class TestRandomColorOp: + @pytest.mark.parametrize("p", [0.0, 1.0]) + @pytest.mark.parametrize( + "transform_cls, func_op_name, kwargs", + [ + (transforms.RandomEqualize, "equalize", {}), + (transforms.RandomInvert, "invert", {}), + (transforms.RandomAutocontrast, "autocontrast", {}), + (transforms.RandomPosterize, "posterize", {"bits": 4}), + (transforms.RandomSolarize, "solarize", {"threshold": 0.5}), + (transforms.RandomAdjustSharpness, "adjust_sharpness", {"sharpness_factor": 0.5}), + ], + ) + def test__transform(self, p, transform_cls, func_op_name, kwargs, mocker): + transform = transform_cls(p=p, **kwargs) + + fn = mocker.patch(f"torchvision.transforms.v2.functional.{func_op_name}") + inpt = mocker.MagicMock(spec=datapoints.Image) + _ = transform(inpt) + if p > 0.0: + fn.assert_called_once_with(inpt, **kwargs) + else: + assert fn.call_count == 0 + + +class TestRandomPerspective: + def test_assertions(self): + with pytest.raises(ValueError, match="Argument distortion_scale value should be between 0 and 1"): + transforms.RandomPerspective(distortion_scale=-1.0) + + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomPerspective(0.5, fill="abc") + + def test__get_params(self, mocker): + dscale = 0.5 + transform = transforms.RandomPerspective(dscale) + image = mocker.MagicMock(spec=datapoints.Image) + image.num_channels = 3 + image.spatial_size = (24, 32) + + params = transform._get_params([image]) + + h, w = image.spatial_size + assert "coefficients" in params + assert len(params["coefficients"]) == 8 + + @pytest.mark.parametrize("distortion_scale", [0.1, 0.7]) + def test__transform(self, distortion_scale, mocker): + interpolation = InterpolationMode.BILINEAR + fill = 12 + transform = transforms.RandomPerspective(distortion_scale, fill=fill, interpolation=interpolation) + + fn = mocker.patch("torchvision.transforms.v2.functional.perspective") + inpt = mocker.MagicMock(spec=datapoints.Image) + inpt.num_channels = 3 + inpt.spatial_size = (24, 32) + # vfdev-5, Feature Request: let's store params as Transform attribute + # This could be also helpful for users + # Otherwise, we can mock transform._get_params + torch.manual_seed(12) + _ = transform(inpt) + torch.manual_seed(12) + torch.rand(1) # random apply changes random state + params = transform._get_params([inpt]) + + fill = transforms._utils._convert_fill_arg(fill) + fn.assert_called_once_with(inpt, None, None, **params, fill=fill, interpolation=interpolation) + + +class TestElasticTransform: + def test_assertions(self): + + with pytest.raises(TypeError, match="alpha should be float or a sequence of floats"): + transforms.ElasticTransform({}) + + with pytest.raises(ValueError, match="alpha is a sequence its length should be one of 2"): + transforms.ElasticTransform([1.0, 2.0, 3.0]) + + with pytest.raises(ValueError, match="alpha should be a sequence of floats"): + transforms.ElasticTransform([1, 2]) + + with pytest.raises(TypeError, match="sigma should be float or a sequence of floats"): + transforms.ElasticTransform(1.0, {}) + + with pytest.raises(ValueError, match="sigma is a sequence its length should be one of 2"): + transforms.ElasticTransform(1.0, [1.0, 2.0, 3.0]) + + with pytest.raises(ValueError, match="sigma should be a sequence of floats"): + transforms.ElasticTransform(1.0, [1, 2]) + + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.ElasticTransform(1.0, 2.0, fill="abc") + + def test__get_params(self, mocker): + alpha = 2.0 + sigma = 3.0 + transform = transforms.ElasticTransform(alpha, sigma) + image = mocker.MagicMock(spec=datapoints.Image) + image.num_channels = 3 + image.spatial_size = (24, 32) + + params = transform._get_params([image]) + + h, w = image.spatial_size + displacement = params["displacement"] + assert displacement.shape == (1, h, w, 2) + assert (-alpha / w <= displacement[0, ..., 0]).all() and (displacement[0, ..., 0] <= alpha / w).all() + assert (-alpha / h <= displacement[0, ..., 1]).all() and (displacement[0, ..., 1] <= alpha / h).all() + + @pytest.mark.parametrize("alpha", [5.0, [5.0, 10.0]]) + @pytest.mark.parametrize("sigma", [2.0, [2.0, 5.0]]) + def test__transform(self, alpha, sigma, mocker): + interpolation = InterpolationMode.BILINEAR + fill = 12 + transform = transforms.ElasticTransform(alpha, sigma=sigma, fill=fill, interpolation=interpolation) + + if isinstance(alpha, float): + assert transform.alpha == [alpha, alpha] + else: + assert transform.alpha == alpha + + if isinstance(sigma, float): + assert transform.sigma == [sigma, sigma] + else: + assert transform.sigma == sigma + + fn = mocker.patch("torchvision.transforms.v2.functional.elastic") + inpt = mocker.MagicMock(spec=datapoints.Image) + inpt.num_channels = 3 + inpt.spatial_size = (24, 32) + + # Let's mock transform._get_params to control the output: + transform._get_params = mocker.MagicMock() + _ = transform(inpt) + params = transform._get_params([inpt]) + fill = transforms._utils._convert_fill_arg(fill) + fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation) + + +class TestRandomErasing: + def test_assertions(self, mocker): + with pytest.raises(TypeError, match="Argument value should be either a number or str or a sequence"): + transforms.RandomErasing(value={}) + + with pytest.raises(ValueError, match="If value is str, it should be 'random'"): + transforms.RandomErasing(value="abc") + + with pytest.raises(TypeError, match="Scale should be a sequence"): + transforms.RandomErasing(scale=123) + + with pytest.raises(TypeError, match="Ratio should be a sequence"): + transforms.RandomErasing(ratio=123) + + with pytest.raises(ValueError, match="Scale should be between 0 and 1"): + transforms.RandomErasing(scale=[-1, 2]) + + image = mocker.MagicMock(spec=datapoints.Image) + image.num_channels = 3 + image.spatial_size = (24, 32) + + transform = transforms.RandomErasing(value=[1, 2, 3, 4]) + + with pytest.raises(ValueError, match="If value is a sequence, it should have either a single value"): + transform._get_params([image]) + + @pytest.mark.parametrize("value", [5.0, [1, 2, 3], "random"]) + def test__get_params(self, value, mocker): + image = mocker.MagicMock(spec=datapoints.Image) + image.num_channels = 3 + image.spatial_size = (24, 32) + + transform = transforms.RandomErasing(value=value) + params = transform._get_params([image]) + + v = params["v"] + h, w = params["h"], params["w"] + i, j = params["i"], params["j"] + assert isinstance(v, torch.Tensor) + if value == "random": + assert v.shape == (image.num_channels, h, w) + elif isinstance(value, (int, float)): + assert v.shape == (1, 1, 1) + elif isinstance(value, (list, tuple)): + assert v.shape == (image.num_channels, 1, 1) + + assert 0 <= i <= image.spatial_size[0] - h + assert 0 <= j <= image.spatial_size[1] - w + + @pytest.mark.parametrize("p", [0, 1]) + def test__transform(self, mocker, p): + transform = transforms.RandomErasing(p=p) + transform._transformed_types = (mocker.MagicMock,) + + i_sentinel = mocker.MagicMock() + j_sentinel = mocker.MagicMock() + h_sentinel = mocker.MagicMock() + w_sentinel = mocker.MagicMock() + v_sentinel = mocker.MagicMock() + mocker.patch( + "torchvision.transforms.v2._augment.RandomErasing._get_params", + return_value=dict(i=i_sentinel, j=j_sentinel, h=h_sentinel, w=w_sentinel, v=v_sentinel), + ) + + inpt_sentinel = mocker.MagicMock() + + mock = mocker.patch("torchvision.transforms.v2._augment.F.erase") + output = transform(inpt_sentinel) + + if p: + mock.assert_called_once_with( + inpt_sentinel, + i=i_sentinel, + j=j_sentinel, + h=h_sentinel, + w=w_sentinel, + v=v_sentinel, + inplace=transform.inplace, + ) + else: + mock.assert_not_called() + assert output is inpt_sentinel + + +class TestTransform: + @pytest.mark.parametrize( + "inpt_type", + [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int], + ) + def test_check_transformed_types(self, inpt_type, mocker): + # This test ensures that we correctly handle which types to transform and which to bypass + t = transforms.Transform() + inpt = mocker.MagicMock(spec=inpt_type) + + if inpt_type in (np.ndarray, str, int): + output = t(inpt) + assert output is inpt + else: + with pytest.raises(NotImplementedError): + t(inpt) + + +class TestToImageTensor: + @pytest.mark.parametrize( + "inpt_type", + [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int], + ) + def test__transform(self, inpt_type, mocker): + fn = mocker.patch( + "torchvision.transforms.v2.functional.to_image_tensor", + return_value=torch.rand(1, 3, 8, 8), + ) + + inpt = mocker.MagicMock(spec=inpt_type) + transform = transforms.ToImageTensor() + transform(inpt) + if inpt_type in (datapoints.BoundingBox, datapoints.Image, str, int): + assert fn.call_count == 0 + else: + fn.assert_called_once_with(inpt) + + +class TestToImagePIL: + @pytest.mark.parametrize( + "inpt_type", + [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int], + ) + def test__transform(self, inpt_type, mocker): + fn = mocker.patch("torchvision.transforms.v2.functional.to_image_pil") + + inpt = mocker.MagicMock(spec=inpt_type) + transform = transforms.ToImagePIL() + transform(inpt) + if inpt_type in (datapoints.BoundingBox, PIL.Image.Image, str, int): + assert fn.call_count == 0 + else: + fn.assert_called_once_with(inpt, mode=transform.mode) + + +class TestToPILImage: + @pytest.mark.parametrize( + "inpt_type", + [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int], + ) + def test__transform(self, inpt_type, mocker): + fn = mocker.patch("torchvision.transforms.v2.functional.to_image_pil") + + inpt = mocker.MagicMock(spec=inpt_type) + transform = transforms.ToPILImage() + transform(inpt) + if inpt_type in (PIL.Image.Image, datapoints.BoundingBox, str, int): + assert fn.call_count == 0 + else: + fn.assert_called_once_with(inpt, mode=transform.mode) + + +class TestToTensor: + @pytest.mark.parametrize( + "inpt_type", + [torch.Tensor, PIL.Image.Image, datapoints.Image, np.ndarray, datapoints.BoundingBox, str, int], + ) + def test__transform(self, inpt_type, mocker): + fn = mocker.patch("torchvision.transforms.functional.to_tensor") + + inpt = mocker.MagicMock(spec=inpt_type) + with pytest.warns(UserWarning, match="deprecated and will be removed"): + transform = transforms.ToTensor() + transform(inpt) + if inpt_type in (datapoints.Image, torch.Tensor, datapoints.BoundingBox, str, int): + assert fn.call_count == 0 + else: + fn.assert_called_once_with(inpt) + + +class TestContainers: + @pytest.mark.parametrize("transform_cls", [transforms.Compose, transforms.RandomChoice, transforms.RandomOrder]) + def test_assertions(self, transform_cls): + with pytest.raises(TypeError, match="Argument transforms should be a sequence of callables"): + transform_cls(transforms.RandomCrop(28)) + + @pytest.mark.parametrize("transform_cls", [transforms.Compose, transforms.RandomChoice, transforms.RandomOrder]) + @pytest.mark.parametrize( + "trfms", + [ + [transforms.Pad(2), transforms.RandomCrop(28)], + [lambda x: 2.0 * x, transforms.Pad(2), transforms.RandomCrop(28)], + [transforms.Pad(2), lambda x: 2.0 * x, transforms.RandomCrop(28)], + ], + ) + def test_ctor(self, transform_cls, trfms): + c = transform_cls(trfms) + inpt = torch.rand(1, 3, 32, 32) + output = c(inpt) + assert isinstance(output, torch.Tensor) + assert output.ndim == 4 + + +class TestRandomChoice: + def test_assertions(self): + with pytest.warns(UserWarning, match="Argument p is deprecated and will be removed"): + transforms.RandomChoice([transforms.Pad(2), transforms.RandomCrop(28)], p=[1, 2]) + + with pytest.raises(ValueError, match="The number of probabilities doesn't match the number of transforms"): + transforms.RandomChoice([transforms.Pad(2), transforms.RandomCrop(28)], probabilities=[1]) + + +class TestRandomIoUCrop: + @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("options", [[0.5, 0.9], [2.0]]) + def test__get_params(self, device, options, mocker): + image = mocker.MagicMock(spec=datapoints.Image) + image.num_channels = 3 + image.spatial_size = (24, 32) + bboxes = datapoints.BoundingBox( + torch.tensor([[1, 1, 10, 10], [20, 20, 23, 23], [1, 20, 10, 23], [20, 1, 23, 10]]), + format="XYXY", + spatial_size=image.spatial_size, + device=device, + ) + sample = [image, bboxes] + + transform = transforms.RandomIoUCrop(sampler_options=options) + + n_samples = 5 + for _ in range(n_samples): + + params = transform._get_params(sample) + + if options == [2.0]: + assert len(params) == 0 + return + + assert len(params["is_within_crop_area"]) > 0 + assert params["is_within_crop_area"].dtype == torch.bool + + orig_h = image.spatial_size[0] + orig_w = image.spatial_size[1] + assert int(transform.min_scale * orig_h) <= params["height"] <= int(transform.max_scale * orig_h) + assert int(transform.min_scale * orig_w) <= params["width"] <= int(transform.max_scale * orig_w) + + left, top = params["left"], params["top"] + new_h, new_w = params["height"], params["width"] + ious = box_iou( + bboxes, + torch.tensor([[left, top, left + new_w, top + new_h]], dtype=bboxes.dtype, device=bboxes.device), + ) + assert ious.max() >= options[0] or ious.max() >= options[1], f"{ious} vs {options}" + + def test__transform_empty_params(self, mocker): + transform = transforms.RandomIoUCrop(sampler_options=[2.0]) + image = datapoints.Image(torch.rand(1, 3, 4, 4)) + bboxes = datapoints.BoundingBox(torch.tensor([[1, 1, 2, 2]]), format="XYXY", spatial_size=(4, 4)) + label = torch.tensor([1]) + sample = [image, bboxes, label] + # Let's mock transform._get_params to control the output: + transform._get_params = mocker.MagicMock(return_value={}) + output = transform(sample) + torch.testing.assert_close(output, sample) + + def test_forward_assertion(self): + transform = transforms.RandomIoUCrop() + with pytest.raises( + TypeError, + match="requires input sample to contain tensor or PIL images and bounding boxes", + ): + transform(torch.tensor(0)) + + def test__transform(self, mocker): + transform = transforms.RandomIoUCrop() + + image = datapoints.Image(torch.rand(3, 32, 24)) + bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), extra_dims=(6,)) + masks = make_detection_mask((32, 24), num_objects=6) + + sample = [image, bboxes, masks] + + fn = mocker.patch("torchvision.transforms.v2.functional.crop", side_effect=lambda x, **params: x) + is_within_crop_area = torch.tensor([0, 1, 0, 1, 0, 1], dtype=torch.bool) + + params = dict(top=1, left=2, height=12, width=12, is_within_crop_area=is_within_crop_area) + transform._get_params = mocker.MagicMock(return_value=params) + output = transform(sample) + + assert fn.call_count == 3 + + expected_calls = [ + mocker.call(image, top=params["top"], left=params["left"], height=params["height"], width=params["width"]), + mocker.call(bboxes, top=params["top"], left=params["left"], height=params["height"], width=params["width"]), + mocker.call(masks, top=params["top"], left=params["left"], height=params["height"], width=params["width"]), + ] + + fn.assert_has_calls(expected_calls) + + # check number of bboxes vs number of labels: + output_bboxes = output[1] + assert isinstance(output_bboxes, datapoints.BoundingBox) + assert (output_bboxes[~is_within_crop_area] == 0).all() + + output_masks = output[2] + assert isinstance(output_masks, datapoints.Mask) + + +class TestScaleJitter: + def test__get_params(self, mocker): + spatial_size = (24, 32) + target_size = (16, 12) + scale_range = (0.5, 1.5) + + transform = transforms.ScaleJitter(target_size=target_size, scale_range=scale_range) + sample = mocker.MagicMock(spec=datapoints.Image, num_channels=3, spatial_size=spatial_size) + + n_samples = 5 + for _ in range(n_samples): + + params = transform._get_params([sample]) + + assert "size" in params + size = params["size"] + + assert isinstance(size, tuple) and len(size) == 2 + height, width = size + + r_min = min(target_size[1] / spatial_size[0], target_size[0] / spatial_size[1]) * scale_range[0] + r_max = min(target_size[1] / spatial_size[0], target_size[0] / spatial_size[1]) * scale_range[1] + + assert int(spatial_size[0] * r_min) <= height <= int(spatial_size[0] * r_max) + assert int(spatial_size[1] * r_min) <= width <= int(spatial_size[1] * r_max) + + def test__transform(self, mocker): + interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode) + antialias_sentinel = mocker.MagicMock() + + transform = transforms.ScaleJitter( + target_size=(16, 12), interpolation=interpolation_sentinel, antialias=antialias_sentinel + ) + transform._transformed_types = (mocker.MagicMock,) + + size_sentinel = mocker.MagicMock() + mocker.patch( + "torchvision.transforms.v2._geometry.ScaleJitter._get_params", return_value=dict(size=size_sentinel) + ) + + inpt_sentinel = mocker.MagicMock() + + mock = mocker.patch("torchvision.transforms.v2._geometry.F.resize") + transform(inpt_sentinel) + + mock.assert_called_once_with( + inpt_sentinel, size=size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel + ) + + +class TestRandomShortestSize: + @pytest.mark.parametrize("min_size,max_size", [([5, 9], 20), ([5, 9], None)]) + def test__get_params(self, min_size, max_size, mocker): + spatial_size = (3, 10) + + transform = transforms.RandomShortestSize(min_size=min_size, max_size=max_size) + + sample = mocker.MagicMock(spec=datapoints.Image, num_channels=3, spatial_size=spatial_size) + params = transform._get_params([sample]) + + assert "size" in params + size = params["size"] + + assert isinstance(size, tuple) and len(size) == 2 + + longer = max(size) + shorter = min(size) + if max_size is not None: + assert longer <= max_size + assert shorter <= max_size + else: + assert shorter in min_size + + def test__transform(self, mocker): + interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode) + antialias_sentinel = mocker.MagicMock() + + transform = transforms.RandomShortestSize( + min_size=[3, 5, 7], max_size=12, interpolation=interpolation_sentinel, antialias=antialias_sentinel + ) + transform._transformed_types = (mocker.MagicMock,) + + size_sentinel = mocker.MagicMock() + mocker.patch( + "torchvision.transforms.v2._geometry.RandomShortestSize._get_params", + return_value=dict(size=size_sentinel), + ) + + inpt_sentinel = mocker.MagicMock() + + mock = mocker.patch("torchvision.transforms.v2._geometry.F.resize") + transform(inpt_sentinel) + + mock.assert_called_once_with( + inpt_sentinel, size=size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel + ) + + +class TestLinearTransformation: + def test_assertions(self): + with pytest.raises(ValueError, match="transformation_matrix should be square"): + transforms.LinearTransformation(torch.rand(2, 3), torch.rand(5)) + + with pytest.raises(ValueError, match="mean_vector should have the same length"): + transforms.LinearTransformation(torch.rand(3, 3), torch.rand(5)) + + @pytest.mark.parametrize( + "inpt", + [ + 122 * torch.ones(1, 3, 8, 8), + 122.0 * torch.ones(1, 3, 8, 8), + datapoints.Image(122 * torch.ones(1, 3, 8, 8)), + PIL.Image.new("RGB", (8, 8), (122, 122, 122)), + ], + ) + def test__transform(self, inpt): + + v = 121 * torch.ones(3 * 8 * 8) + m = torch.ones(3 * 8 * 8, 3 * 8 * 8) + transform = transforms.LinearTransformation(m, v) + + if isinstance(inpt, PIL.Image.Image): + with pytest.raises(TypeError, match="LinearTransformation does not work on PIL Images"): + transform(inpt) + else: + output = transform(inpt) + assert isinstance(output, torch.Tensor) + assert output.unique() == 3 * 8 * 8 + assert output.dtype == inpt.dtype + + +class TestRandomResize: + def test__get_params(self): + min_size = 3 + max_size = 6 + + transform = transforms.RandomResize(min_size=min_size, max_size=max_size) + + for _ in range(10): + params = transform._get_params([]) + + assert isinstance(params["size"], list) and len(params["size"]) == 1 + size = params["size"][0] + + assert min_size <= size < max_size + + def test__transform(self, mocker): + interpolation_sentinel = mocker.MagicMock(spec=InterpolationMode) + antialias_sentinel = mocker.MagicMock() + + transform = transforms.RandomResize( + min_size=-1, max_size=-1, interpolation=interpolation_sentinel, antialias=antialias_sentinel + ) + transform._transformed_types = (mocker.MagicMock,) + + size_sentinel = mocker.MagicMock() + mocker.patch( + "torchvision.transforms.v2._geometry.RandomResize._get_params", + return_value=dict(size=size_sentinel), + ) + + inpt_sentinel = mocker.MagicMock() + + mock_resize = mocker.patch("torchvision.transforms.v2._geometry.F.resize") + transform(inpt_sentinel) + + mock_resize.assert_called_with( + inpt_sentinel, size_sentinel, interpolation=interpolation_sentinel, antialias=antialias_sentinel + ) + + +class TestToDtype: + @pytest.mark.parametrize( + ("dtype", "expected_dtypes"), + [ + ( + torch.float64, + { + datapoints.Video: torch.float64, + datapoints.Image: torch.float64, + datapoints.BoundingBox: torch.float64, + }, + ), + ( + {datapoints.Video: torch.int32, datapoints.Image: torch.float32, datapoints.BoundingBox: torch.float64}, + {datapoints.Video: torch.int32, datapoints.Image: torch.float32, datapoints.BoundingBox: torch.float64}, + ), + ], + ) + def test_call(self, dtype, expected_dtypes): + sample = dict( + video=make_video(dtype=torch.int64), + image=make_image(dtype=torch.uint8), + bounding_box=make_bounding_box(format=datapoints.BoundingBoxFormat.XYXY, dtype=torch.float32), + str="str", + int=0, + ) + + transform = transforms.ToDtype(dtype) + transformed_sample = transform(sample) + + for key, value in sample.items(): + value_type = type(value) + transformed_value = transformed_sample[key] + + # make sure the transformation retains the type + assert isinstance(transformed_value, value_type) + + if isinstance(value, torch.Tensor): + assert transformed_value.dtype is expected_dtypes[value_type] + else: + assert transformed_value is value + + @pytest.mark.filterwarnings("error") + def test_plain_tensor_call(self): + tensor = torch.empty((), dtype=torch.float32) + transform = transforms.ToDtype({torch.Tensor: torch.float64}) + + assert transform(tensor).dtype is torch.float64 + + @pytest.mark.parametrize("other_type", [datapoints.Image, datapoints.Video]) + def test_plain_tensor_warning(self, other_type): + with pytest.warns(UserWarning, match=re.escape("`torch.Tensor` will *not* be transformed")): + transforms.ToDtype(dtype={torch.Tensor: torch.float32, other_type: torch.float64}) + + +class TestUniformTemporalSubsample: + @pytest.mark.parametrize( + "inpt", + [ + torch.zeros(10, 3, 8, 8), + torch.zeros(1, 10, 3, 8, 8), + datapoints.Video(torch.zeros(1, 10, 3, 8, 8)), + ], + ) + def test__transform(self, inpt): + num_samples = 5 + transform = transforms.UniformTemporalSubsample(num_samples) + + output = transform(inpt) + assert type(output) is type(inpt) + assert output.shape[-4] == num_samples + assert output.dtype == inpt.dtype + + +# TODO: remove this test in 0.17 when the default of antialias changes to True +def test_antialias_warning(): + pil_img = PIL.Image.new("RGB", size=(10, 10), color=127) + tensor_img = torch.randint(0, 256, size=(3, 10, 10), dtype=torch.uint8) + tensor_video = torch.randint(0, 256, size=(2, 3, 10, 10), dtype=torch.uint8) + + match = "The default value of the antialias parameter" + with pytest.warns(UserWarning, match=match): + transforms.Resize((20, 20))(tensor_img) + with pytest.warns(UserWarning, match=match): + transforms.RandomResizedCrop((20, 20))(tensor_img) + with pytest.warns(UserWarning, match=match): + transforms.ScaleJitter((20, 20))(tensor_img) + with pytest.warns(UserWarning, match=match): + transforms.RandomShortestSize((20, 20))(tensor_img) + with pytest.warns(UserWarning, match=match): + transforms.RandomResize(10, 20)(tensor_img) + + with pytest.warns(UserWarning, match=match): + transforms.functional.resize(tensor_img, (20, 20)) + with pytest.warns(UserWarning, match=match): + transforms.functional.resize_image_tensor(tensor_img, (20, 20)) + + with pytest.warns(UserWarning, match=match): + transforms.functional.resize(tensor_video, (20, 20)) + with pytest.warns(UserWarning, match=match): + transforms.functional.resize_video(tensor_video, (20, 20)) + + with pytest.warns(UserWarning, match=match): + datapoints.Image(tensor_img).resize((20, 20)) + with pytest.warns(UserWarning, match=match): + datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20)) + + with pytest.warns(UserWarning, match=match): + datapoints.Video(tensor_video).resize((20, 20)) + with pytest.warns(UserWarning, match=match): + datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20)) + + with warnings.catch_warnings(): + warnings.simplefilter("error") + transforms.Resize((20, 20))(pil_img) + transforms.RandomResizedCrop((20, 20))(pil_img) + transforms.ScaleJitter((20, 20))(pil_img) + transforms.RandomShortestSize((20, 20))(pil_img) + transforms.RandomResize(10, 20)(pil_img) + transforms.functional.resize(pil_img, (20, 20)) + + transforms.Resize((20, 20), antialias=True)(tensor_img) + transforms.RandomResizedCrop((20, 20), antialias=True)(tensor_img) + transforms.ScaleJitter((20, 20), antialias=True)(tensor_img) + transforms.RandomShortestSize((20, 20), antialias=True)(tensor_img) + transforms.RandomResize(10, 20, antialias=True)(tensor_img) + + transforms.functional.resize(tensor_img, (20, 20), antialias=True) + transforms.functional.resize_image_tensor(tensor_img, (20, 20), antialias=True) + transforms.functional.resize(tensor_video, (20, 20), antialias=True) + transforms.functional.resize_video(tensor_video, (20, 20), antialias=True) + + datapoints.Image(tensor_img).resize((20, 20), antialias=True) + datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20), antialias=True) + datapoints.Video(tensor_video).resize((20, 20), antialias=True) + datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20), antialias=True) + + +@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, datapoints.Image)) +@pytest.mark.parametrize("label_type", (torch.Tensor, int)) +@pytest.mark.parametrize("dataset_return_type", (dict, tuple)) +@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImageTensor)) +def test_classif_preset(image_type, label_type, dataset_return_type, to_tensor): + + image = datapoints.Image(torch.randint(0, 256, size=(1, 3, 250, 250), dtype=torch.uint8)) + if image_type is PIL.Image: + image = to_pil_image(image[0]) + elif image_type is torch.Tensor: + image = image.as_subclass(torch.Tensor) + assert is_simple_tensor(image) + + label = 1 if label_type is int else torch.tensor([1]) + + if dataset_return_type is dict: + sample = { + "image": image, + "label": label, + } + else: + sample = image, label + + t = transforms.Compose( + [ + transforms.RandomResizedCrop((224, 224)), + transforms.RandomHorizontalFlip(p=1), + transforms.RandAugment(), + transforms.TrivialAugmentWide(), + transforms.AugMix(), + transforms.AutoAugment(), + to_tensor(), + # TODO: ConvertImageDtype is a pass-through on PIL images, is that + # intended? This results in a failure if we convert to tensor after + # it, because the image would still be uint8 which make Normalize + # fail. + transforms.ConvertImageDtype(torch.float), + transforms.Normalize(mean=[0, 0, 0], std=[1, 1, 1]), + transforms.RandomErasing(p=1), + ] + ) + + out = t(sample) + + assert type(out) == type(sample) + + if dataset_return_type is tuple: + out_image, out_label = out + else: + assert out.keys() == sample.keys() + out_image, out_label = out.values() + + assert out_image.shape[-2:] == (224, 224) + assert out_label == label + + +@pytest.mark.parametrize("image_type", (PIL.Image, torch.Tensor, datapoints.Image)) +@pytest.mark.parametrize("data_augmentation", ("hflip", "lsj", "multiscale", "ssd", "ssdlite")) +@pytest.mark.parametrize("to_tensor", (transforms.ToTensor, transforms.ToImageTensor)) +@pytest.mark.parametrize("sanitize", (True, False)) +def test_detection_preset(image_type, data_augmentation, to_tensor, sanitize): + torch.manual_seed(0) + if data_augmentation == "hflip": + t = [ + transforms.RandomHorizontalFlip(p=1), + to_tensor(), + transforms.ConvertImageDtype(torch.float), + ] + elif data_augmentation == "lsj": + t = [ + transforms.ScaleJitter(target_size=(1024, 1024), antialias=True), + # Note: replaced FixedSizeCrop with RandomCrop, becuase we're + # leaving FixedSizeCrop in prototype for now, and it expects Label + # classes which we won't release yet. + # transforms.FixedSizeCrop( + # size=(1024, 1024), fill=defaultdict(lambda: (123.0, 117.0, 104.0), {datapoints.Mask: 0}) + # ), + transforms.RandomCrop((1024, 1024), pad_if_needed=True), + transforms.RandomHorizontalFlip(p=1), + to_tensor(), + transforms.ConvertImageDtype(torch.float), + ] + elif data_augmentation == "multiscale": + t = [ + transforms.RandomShortestSize( + min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333, antialias=True + ), + transforms.RandomHorizontalFlip(p=1), + to_tensor(), + transforms.ConvertImageDtype(torch.float), + ] + elif data_augmentation == "ssd": + t = [ + transforms.RandomPhotometricDistort(p=1), + transforms.RandomZoomOut(fill=defaultdict(lambda: (123.0, 117.0, 104.0), {datapoints.Mask: 0})), + transforms.RandomIoUCrop(), + transforms.RandomHorizontalFlip(p=1), + to_tensor(), + transforms.ConvertImageDtype(torch.float), + ] + elif data_augmentation == "ssdlite": + t = [ + transforms.RandomIoUCrop(), + transforms.RandomHorizontalFlip(p=1), + to_tensor(), + transforms.ConvertImageDtype(torch.float), + ] + if sanitize: + t += [transforms.SanitizeBoundingBoxes()] + t = transforms.Compose(t) + + num_boxes = 5 + H = W = 250 + + image = datapoints.Image(torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8)) + if image_type is PIL.Image: + image = to_pil_image(image[0]) + elif image_type is torch.Tensor: + image = image.as_subclass(torch.Tensor) + assert is_simple_tensor(image) + + label = torch.randint(0, 10, size=(num_boxes,)) + + boxes = torch.randint(0, min(H, W) // 2, size=(num_boxes, 4)) + boxes[:, 2:] += boxes[:, :2] + boxes = boxes.clamp(min=0, max=min(H, W)) + boxes = datapoints.BoundingBox(boxes, format="XYXY", spatial_size=(H, W)) + + masks = datapoints.Mask(torch.randint(0, 2, size=(num_boxes, H, W), dtype=torch.uint8)) + + sample = { + "image": image, + "label": label, + "boxes": boxes, + "masks": masks, + } + + out = t(sample) + + if to_tensor is transforms.ToTensor and image_type is not datapoints.Image: + assert is_simple_tensor(out["image"]) + else: + assert isinstance(out["image"], datapoints.Image) + assert isinstance(out["label"], type(sample["label"])) + + num_boxes_expected = { + # ssd and ssdlite contain RandomIoUCrop which may "remove" some bbox. It + # doesn't remove them strictly speaking, it just marks some boxes as + # degenerate and those boxes will be later removed by + # SanitizeBoundingBoxes(), which we add to the pipelines if the sanitize + # param is True. + # Note that the values below are probably specific to the random seed + # set above (which is fine). + (True, "ssd"): 4, + (True, "ssdlite"): 4, + }.get((sanitize, data_augmentation), num_boxes) + + assert out["boxes"].shape[0] == out["masks"].shape[0] == out["label"].shape[0] == num_boxes_expected + + +@pytest.mark.parametrize("min_size", (1, 10)) +@pytest.mark.parametrize( + "labels_getter", ("default", "labels", lambda inputs: inputs["labels"], None, lambda inputs: None) +) +def test_sanitize_bounding_boxes(min_size, labels_getter): + H, W = 256, 128 + + boxes_and_validity = [ + ([0, 1, 10, 1], False), # Y1 == Y2 + ([0, 1, 0, 20], False), # X1 == X2 + ([0, 0, min_size - 1, 10], False), # H < min_size + ([0, 0, 10, min_size - 1], False), # W < min_size + ([0, 0, 10, H + 1], False), # Y2 > H + ([0, 0, W + 1, 10], False), # X2 > W + ([-1, 1, 10, 20], False), # any < 0 + ([0, 0, -1, 20], False), # any < 0 + ([0, 0, -10, -1], False), # any < 0 + ([0, 0, min_size, 10], True), # H < min_size + ([0, 0, 10, min_size], True), # W < min_size + ([0, 0, W, H], True), # TODO: Is that actually OK?? Should it be -1? + ([1, 1, 30, 20], True), + ([0, 0, 10, 10], True), + ([1, 1, 30, 20], True), + ] + + random.shuffle(boxes_and_validity) # For test robustness: mix order of wrong and correct cases + boxes, is_valid_mask = zip(*boxes_and_validity) + valid_indices = [i for (i, is_valid) in enumerate(is_valid_mask) if is_valid] + + boxes = torch.tensor(boxes) + labels = torch.arange(boxes.shape[0]) + + boxes = datapoints.BoundingBox( + boxes, + format=datapoints.BoundingBoxFormat.XYXY, + spatial_size=(H, W), + ) + + masks = datapoints.Mask(torch.randint(0, 2, size=(boxes.shape[0], H, W))) + + sample = { + "image": torch.randint(0, 256, size=(1, 3, H, W), dtype=torch.uint8), + "labels": labels, + "boxes": boxes, + "whatever": torch.rand(10), + "None": None, + "masks": masks, + } + + out = transforms.SanitizeBoundingBoxes(min_size=min_size, labels_getter=labels_getter)(sample) + + assert out["image"] is sample["image"] + assert out["whatever"] is sample["whatever"] + + if labels_getter is None or (callable(labels_getter) and labels_getter({"labels": "blah"}) is None): + assert out["labels"] is sample["labels"] + else: + assert isinstance(out["labels"], torch.Tensor) + assert out["boxes"].shape[0] == out["labels"].shape[0] == out["masks"].shape[0] + # This works because we conveniently set labels to arange(num_boxes) + assert out["labels"].tolist() == valid_indices + + +@pytest.mark.parametrize("key", ("labels", "LABELS", "LaBeL", "SOME_WEIRD_KEY_THAT_HAS_LABeL_IN_IT")) +def test_sanitize_bounding_boxes_default_heuristic(key): + labels = torch.arange(10) + d = {key: labels} + assert transforms.SanitizeBoundingBoxes._find_labels_default_heuristic(d) is labels + + if key.lower() != "labels": + # If "labels" is in the dict (case-insensitive), + # it takes precedence over other keys which would otherwise be a match + d = {key: "something_else", "labels": labels} + assert transforms.SanitizeBoundingBoxes._find_labels_default_heuristic(d) is labels + + +def test_sanitize_bounding_boxes_errors(): + + good_bbox = datapoints.BoundingBox( + [[0, 0, 10, 10]], + format=datapoints.BoundingBoxFormat.XYXY, + spatial_size=(20, 20), + ) + + with pytest.raises(ValueError, match="min_size must be >= 1"): + transforms.SanitizeBoundingBoxes(min_size=0) + with pytest.raises(ValueError, match="labels_getter should either be a str"): + transforms.SanitizeBoundingBoxes(labels_getter=12) + + with pytest.raises(ValueError, match="Could not infer where the labels are"): + bad_labels_key = {"bbox": good_bbox, "BAD_KEY": torch.arange(good_bbox.shape[0])} + transforms.SanitizeBoundingBoxes()(bad_labels_key) + + with pytest.raises(ValueError, match="If labels_getter is a str or 'default'"): + not_a_dict = (good_bbox, torch.arange(good_bbox.shape[0])) + transforms.SanitizeBoundingBoxes()(not_a_dict) + + with pytest.raises(ValueError, match="must be a tensor"): + not_a_tensor = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0]).tolist()} + transforms.SanitizeBoundingBoxes()(not_a_tensor) + + with pytest.raises(ValueError, match="Number of boxes"): + different_sizes = {"bbox": good_bbox, "labels": torch.arange(good_bbox.shape[0] + 3)} + transforms.SanitizeBoundingBoxes()(different_sizes) + + with pytest.raises(ValueError, match="boxes must be of shape"): + bad_bbox = datapoints.BoundingBox( # batch with 2 elements + [ + [[0, 0, 10, 10]], + [[0, 0, 10, 10]], + ], + format=datapoints.BoundingBoxFormat.XYXY, + spatial_size=(20, 20), + ) + different_sizes = {"bbox": bad_bbox, "labels": torch.arange(bad_bbox.shape[0])} + transforms.SanitizeBoundingBoxes()(different_sizes) From 67fdc6d1b9ecd9bf955af86da60fa749bb05fb7f Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 17 Feb 2023 11:03:20 +0100 Subject: [PATCH 04/10] [REVERTME] check if test suite runs without the prototype namespace --- .github/workflows/test-linux-cpu.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test-linux-cpu.yml b/.github/workflows/test-linux-cpu.yml index 5dc7550d868..ec3eca37afb 100644 --- a/.github/workflows/test-linux-cpu.yml +++ b/.github/workflows/test-linux-cpu.yml @@ -54,4 +54,6 @@ jobs: # Run Tests python3 -m torch.utils.collect_env + rm -rf torchvision/prototype + rm test/*prototype* python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20 From f0edac89b29f73cff5961312c1970f0547316e85 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 17 Feb 2023 11:25:16 +0100 Subject: [PATCH 05/10] remove obsolete comment --- test/test_transforms_v2_consistency.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index df4d7a1a60c..125d7ec7a3f 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -1103,12 +1103,6 @@ def make_label(extra_dims, categories): ), (det_transforms.RandomZoomOut(), v2_transforms.RandomZoomOut(), {"with_mask": False}), (det_transforms.ScaleJitter((1024, 1024)), v2_transforms.ScaleJitter((1024, 1024)), {}), - # FIXME - # ( - # det_transforms.FixedSizeCrop((1024, 1024), fill=0), - # prototype_transforms.FixedSizeCrop((1024, 1024), fill=0), - # {}, - # ), ( det_transforms.RandomShortestSize( min_size=(480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800), max_size=1333 From 5d2806d20a6c4ba476c21032b5db0d3487db19f9 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 17 Feb 2023 12:18:26 +0100 Subject: [PATCH 06/10] try increase macos runner --- .circleci/config.yml | 2 +- .circleci/config.yml.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index a8aedd34670..1f9bec31c5d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -901,7 +901,7 @@ jobs: <<: *binary_common macos: xcode: "14.0" - resource_class: large + resource_class: macos.x86.metal.gen1 steps: - checkout - designate_upload_channel diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index ab6fa6c35dd..c671a4244ee 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -901,7 +901,7 @@ jobs: <<: *binary_common macos: xcode: "14.0" - resource_class: large + resource_class: macos.x86.metal.gen1 steps: - checkout - designate_upload_channel From a58edc6e3dfc294819a8574c5e4a419f0e3ea6da Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 17 Feb 2023 12:23:29 +0100 Subject: [PATCH 07/10] Revert "[REVERTME] check if test suite runs without the prototype namespace" This reverts commit 67fdc6d1b9ecd9bf955af86da60fa749bb05fb7f. --- .github/workflows/test-linux-cpu.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/test-linux-cpu.yml b/.github/workflows/test-linux-cpu.yml index ec3eca37afb..5dc7550d868 100644 --- a/.github/workflows/test-linux-cpu.yml +++ b/.github/workflows/test-linux-cpu.yml @@ -54,6 +54,4 @@ jobs: # Run Tests python3 -m torch.utils.collect_env - rm -rf torchvision/prototype - rm test/*prototype* python3 -m pytest --junitxml=test-results/junit.xml -v --durations 20 From eac5c90a48f9f053839ebb1322616ec9c2887380 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 17 Feb 2023 12:46:49 +0100 Subject: [PATCH 08/10] Revert "try increase macos runner" This reverts commit 5d2806d20a6c4ba476c21032b5db0d3487db19f9. --- .circleci/config.yml | 2 +- .circleci/config.yml.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 1f9bec31c5d..a8aedd34670 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -901,7 +901,7 @@ jobs: <<: *binary_common macos: xcode: "14.0" - resource_class: macos.x86.metal.gen1 + resource_class: large steps: - checkout - designate_upload_channel diff --git a/.circleci/config.yml.in b/.circleci/config.yml.in index c671a4244ee..ab6fa6c35dd 100644 --- a/.circleci/config.yml.in +++ b/.circleci/config.yml.in @@ -901,7 +901,7 @@ jobs: <<: *binary_common macos: xcode: "14.0" - resource_class: macos.x86.metal.gen1 + resource_class: large steps: - checkout - designate_upload_channel From a3cb43d01e63438af4f16bb4691cc7f439bdcb27 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 17 Feb 2023 12:50:00 +0100 Subject: [PATCH 09/10] try ignoring v2 tests on macos --- .circleci/unittest/linux/scripts/run_test.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.circleci/unittest/linux/scripts/run_test.sh b/.circleci/unittest/linux/scripts/run_test.sh index 8f6b8cb8485..8cfdde8c8e6 100755 --- a/.circleci/unittest/linux/scripts/run_test.sh +++ b/.circleci/unittest/linux/scripts/run_test.sh @@ -6,4 +6,9 @@ eval "$(./conda/bin/conda shell.bash hook)" conda activate ./env python -m torch.utils.collect_env -pytest --junitxml=test-results/junit.xml -v --durations 20 + +case "$(uname -s)" in + Darwin*) IGNORE='--ignore-glob=test/*v2*';; + *) IGNORE='' +esac +pytest --junitxml=test-results/junit.xml -v --durations 20 $IGNORE From 6691acdd7025de90e97abaea4e5e382c206243e4 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Fri, 17 Feb 2023 13:58:12 +0100 Subject: [PATCH 10/10] try run v2 tests in a separate step for macos --- .circleci/unittest/linux/scripts/run_test.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/.circleci/unittest/linux/scripts/run_test.sh b/.circleci/unittest/linux/scripts/run_test.sh index 8cfdde8c8e6..5348baa71dd 100755 --- a/.circleci/unittest/linux/scripts/run_test.sh +++ b/.circleci/unittest/linux/scripts/run_test.sh @@ -8,7 +8,15 @@ conda activate ./env python -m torch.utils.collect_env case "$(uname -s)" in - Darwin*) IGNORE='--ignore-glob=test/*v2*';; - *) IGNORE='' + Darwin*) + # The largest macOS runner is not able to handle the regular test suite plus the transforms v2 tests at the same + # time due to insufficient resources. Thus, we ignore the transforms v2 tests at first and run them in a separate + # step afterwards. + GLOB='test/test_transforms_v2*' + pytest --junitxml=test-results/junit.xml -v --durations 20 --ignore-glob="${GLOB}" + eval "pytest --junitxml=test-results/junit-transforms-v2.xml -v --durations 20 ${GLOB}" + ;; + *) + pytest --junitxml=test-results/junit.xml -v --durations 20 + ;; esac -pytest --junitxml=test-results/junit.xml -v --durations 20 $IGNORE