From b179c66e7cea963af69bd593ec25e6e076633f66 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 14 Jul 2022 17:39:59 +0200 Subject: [PATCH 1/3] Added GaussianBlur transform and tests --- test/test_prototype_transforms.py | 56 +++++++++++++++++++ test/test_prototype_transforms_functional.py | 11 ++++ torchvision/prototype/features/_feature.py | 3 + torchvision/prototype/features/_image.py | 6 ++ torchvision/prototype/transforms/__init__.py | 5 +- torchvision/prototype/transforms/_misc.py | 33 ++++++++++- .../transforms/functional/__init__.py | 7 ++- .../prototype/transforms/functional/_misc.py | 25 ++++++++- 8 files changed, 142 insertions(+), 4 deletions(-) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index d561705fdfe..8430068e817 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -644,3 +644,59 @@ def test_forward(self, padding, pad_if_needed, fill, padding_mode, mocker): else: # vfdev-5: I do not know how to mock and test this case pass + + +class TestGaussianBlur: + def test_assertions(self): + with pytest.raises(ValueError, match="Kernel size should be a tuple/list of two integers"): + transforms.GaussianBlur([10, 12, 14]) + + with pytest.raises(ValueError, match="Kernel size value should be an odd and positive number"): + transforms.GaussianBlur(4) + + with pytest.raises(TypeError, match="sigma should be a single float or a list/tuple with length 2"): + transforms.GaussianBlur(3, sigma=[1, 2, 3]) + + with pytest.raises(ValueError, match="If sigma is a single number, it must be positive"): + transforms.GaussianBlur(3, sigma=-1.0) + + with pytest.raises(ValueError, match="sigma values should be positive and of the form"): + transforms.GaussianBlur(3, sigma=[2.0, 1.0]) + + @pytest.mark.parametrize("sigma", [10.0, [10.0, 12.0]]) + def test__get_params(self, sigma): + transform = transforms.GaussianBlur(3, sigma=sigma) + params = transform._get_params(None) + + if isinstance(sigma, float): + assert params["sigma"][0] == params["sigma"][1] == 10 + else: + assert sigma[0] <= params["sigma"][0] <= sigma[1] + assert sigma[0] <= params["sigma"][1] <= sigma[1] + + @pytest.mark.parametrize("kernel_size", [3, [3, 5], (5, 3)]) + @pytest.mark.parametrize("sigma", [2.0, [2.0, 3.0]]) + def test__transform(self, kernel_size, sigma, mocker): + transform = transforms.GaussianBlur(kernel_size=kernel_size, sigma=sigma) + + if isinstance(kernel_size, (tuple, list)): + assert transform.kernel_size == kernel_size + else: + assert transform.kernel_size == (kernel_size, kernel_size) + + if isinstance(sigma, (tuple, list)): + assert transform.sigma == sigma + else: + assert transform.sigma == (sigma, sigma) + + + fn = mocker.patch("torchvision.prototype.transforms.functional.gaussian_blur") + inpt = features.Image(torch.rand(1, 3, 32, 32)) + # vfdev-5, Feature Request: let's store params as Transform attribute + # This could be also helpful for users + torch.manual_seed(12) + _ = transform(inpt) + torch.manual_seed(12) + params = transform._get_params(inpt) + + fn.assert_called_once_with(inpt, **params) diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index c880e8db55b..7e8fb9387f9 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -495,6 +495,7 @@ def center_crop_bounding_box(): ) +@register_kernel_info_from_sample_inputs_fn def center_crop_segmentation_mask(): for mask, output_size in itertools.product( make_segmentation_masks(image_sizes=((16, 16), (7, 33), (31, 9))), @@ -503,6 +504,16 @@ def center_crop_segmentation_mask(): yield SampleInput(mask, output_size) +@register_kernel_info_from_sample_inputs_fn +def gaussian_blur_image_tensor(): + for image, kernel_size, sigma in itertools.product( + make_images(extra_dims=((4,),)), + [[3, 3], ], + [None, [3.0, 3.0]], + ): + yield SampleInput(image, kernel_size=kernel_size, sigma=sigma) + + @pytest.mark.parametrize( "kernel", [ diff --git a/torchvision/prototype/features/_feature.py b/torchvision/prototype/features/_feature.py index 775f09f2f4b..6013672d7ef 100644 --- a/torchvision/prototype/features/_feature.py +++ b/torchvision/prototype/features/_feature.py @@ -189,3 +189,6 @@ def equalize(self) -> Any: def invert(self) -> Any: return self + + def gaussian_blur(self, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Any: + return self diff --git a/torchvision/prototype/features/_image.py b/torchvision/prototype/features/_image.py index 447e67b33e9..0abda7b01d8 100644 --- a/torchvision/prototype/features/_image.py +++ b/torchvision/prototype/features/_image.py @@ -309,3 +309,9 @@ def invert(self) -> Image: output = _F.invert_image_tensor(self) return Image.new_like(self, output) + + def gaussian_blur(self, kernel_size: List[int], sigma: Optional[List[float]] = None) -> Image: + from torchvision.prototype.transforms import functional as _F + + output = _F.gaussian_blur_image_tensor(self, kernel_size=kernel_size, sigma=sigma) + return Image.new_like(self, output) diff --git a/torchvision/prototype/transforms/__init__.py b/torchvision/prototype/transforms/__init__.py index db1d006336f..f77b36d4643 100644 --- a/torchvision/prototype/transforms/__init__.py +++ b/torchvision/prototype/transforms/__init__.py @@ -22,7 +22,10 @@ RandomAffine, ) from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace -from ._misc import Identity, Normalize, ToDtype, Lambda +from ._misc import Identity, GaussianBlur, Normalize, ToDtype, Lambda from ._type_conversion import DecodeImage, LabelToOneHot from ._deprecated import Grayscale, RandomGrayscale, ToTensor, ToPILImage, PILToTensor # usort: skip + +# TODO: add RandomPerspective, RandomInvert, RandomPosterize, RandomSolarize, +# RandomAdjustSharpness, RandomAutocontrast, ElasticTransform diff --git a/torchvision/prototype/transforms/_misc.py b/torchvision/prototype/transforms/_misc.py index 54440ee05a5..bb021adf9c8 100644 --- a/torchvision/prototype/transforms/_misc.py +++ b/torchvision/prototype/transforms/_misc.py @@ -1,7 +1,8 @@ import functools -from typing import Any, List, Type, Callable, Dict +from typing import Any, List, Type, Callable, Dict, Sequence, Union import torch +from torchvision.transforms.transforms import _setup_size from torchvision.prototype.transforms import Transform, functional as F @@ -46,6 +47,36 @@ def _transform(self, input: Any, params: Dict[str, Any]) -> Any: return input +class GaussianBlur(Transform): + def __init__( + self, kernel_size: Union[int, Sequence[int]], sigma: Union[float, Sequence[float]] = (0.1, 2.0) + ) -> None: + super().__init__() + self.kernel_size = _setup_size(kernel_size, "Kernel size should be a tuple/list of two integers") + for ks in self.kernel_size: + if ks <= 0 or ks % 2 == 0: + raise ValueError("Kernel size value should be an odd and positive number.") + + if isinstance(sigma, float): + if sigma <= 0: + raise ValueError("If sigma is a single number, it must be positive.") + sigma = (sigma, sigma) + elif isinstance(sigma, Sequence) and len(sigma) == 2: + if not 0.0 < sigma[0] <= sigma[1]: + raise ValueError("sigma values should be positive and of the form (min, max).") + else: + raise TypeError("sigma should be a single float or a list/tuple with length 2 floats.") + + self.sigma = sigma + + def _get_params(self, sample: Any) -> Dict[str, Any]: + sigma = torch.empty(1).uniform_(self.sigma[0], self.sigma[1]).item() + return dict(sigma=[sigma, sigma]) + + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + return F.gaussian_blur(inpt, **params) + + class ToDtype(Lambda): def __init__(self, dtype: torch.dtype, *types: Type) -> None: self.dtype = dtype diff --git a/torchvision/prototype/transforms/functional/__init__.py b/torchvision/prototype/transforms/functional/__init__.py index a8c17577a56..2d2618cf497 100644 --- a/torchvision/prototype/transforms/functional/__init__.py +++ b/torchvision/prototype/transforms/functional/__init__.py @@ -99,7 +99,12 @@ ten_crop_image_tensor, ten_crop_image_pil, ) -from ._misc import normalize_image_tensor, gaussian_blur_image_tensor +from ._misc import ( + normalize_image_tensor, + gaussian_blur, + gaussian_blur_image_tensor, + gaussian_blur_image_pil, +) from ._type_conversion import ( decode_image_with_pil, decode_video_with_av, diff --git a/torchvision/prototype/transforms/functional/_misc.py b/torchvision/prototype/transforms/functional/_misc.py index 7b7139a5fd9..e51cac1745e 100644 --- a/torchvision/prototype/transforms/functional/_misc.py +++ b/torchvision/prototype/transforms/functional/_misc.py @@ -1,14 +1,28 @@ -from typing import Optional, List +from typing import Optional, List, Union import PIL.Image import torch +from torchvision.prototype import features from torchvision.transforms import functional_tensor as _FT from torchvision.transforms.functional import pil_to_tensor, to_pil_image +# shortcut type +DType = Union[torch.Tensor, PIL.Image.Image, features._Feature] + + normalize_image_tensor = _FT.normalize +def normalize(inpt: DType, mean: List[float], std: List[float], inplace: bool = False) -> DType: + if isinstance(inpt, features.Image): + return normalize_image_tensor(inpt, mean=mean, std=std, inplace=inplace) + elif type(inpt) == torch.Tensor: + return normalize_image_tensor(inpt, mean=mean, std=std, inplace=inplace) + else: + raise TypeError("Unsupported input type") + + def gaussian_blur_image_tensor( img: torch.Tensor, kernel_size: List[int], sigma: Optional[List[float]] = None ) -> torch.Tensor: @@ -42,3 +56,12 @@ def gaussian_blur_image_pil(img: PIL.Image, kernel_size: List[int], sigma: Optio t_img = pil_to_tensor(img) output = gaussian_blur_image_tensor(t_img, kernel_size=kernel_size, sigma=sigma) return to_pil_image(output, mode=img.mode) + + +def gaussian_blur(inpt: DType, kernel_size: List[int], sigma: Optional[List[float]] = None) -> DType: + if isinstance(inpt, features._Feature): + return inpt.gaussian_blur(kernel_size=kernel_size, sigma=sigma) + elif isinstance(inpt, PIL.Image.Image): + return gaussian_blur_image_pil(inpt, kernel_size=kernel_size, sigma=sigma) + else: + return gaussian_blur_image_tensor(inpt, kernel_size=kernel_size, sigma=sigma) From decc3dde71da72da71cdbf68637f6407a8676ade Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Fri, 15 Jul 2022 12:30:46 +0200 Subject: [PATCH 2/3] Fixing code format --- test/test_prototype_transforms.py | 1 - test/test_prototype_transforms_functional.py | 2 +- torchvision/prototype/transforms/_misc.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index 8430068e817..d6987f6b71b 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -689,7 +689,6 @@ def test__transform(self, kernel_size, sigma, mocker): else: assert transform.sigma == (sigma, sigma) - fn = mocker.patch("torchvision.prototype.transforms.functional.gaussian_blur") inpt = features.Image(torch.rand(1, 3, 32, 32)) # vfdev-5, Feature Request: let's store params as Transform attribute diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index 7e8fb9387f9..77dad8dd857 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -508,7 +508,7 @@ def center_crop_segmentation_mask(): def gaussian_blur_image_tensor(): for image, kernel_size, sigma in itertools.product( make_images(extra_dims=((4,),)), - [[3, 3], ], + [[3, 3]], [None, [3.0, 3.0]], ): yield SampleInput(image, kernel_size=kernel_size, sigma=sigma) diff --git a/torchvision/prototype/transforms/_misc.py b/torchvision/prototype/transforms/_misc.py index bb021adf9c8..b8e9101f2a0 100644 --- a/torchvision/prototype/transforms/_misc.py +++ b/torchvision/prototype/transforms/_misc.py @@ -2,8 +2,8 @@ from typing import Any, List, Type, Callable, Dict, Sequence, Union import torch -from torchvision.transforms.transforms import _setup_size from torchvision.prototype.transforms import Transform, functional as F +from torchvision.transforms.transforms import _setup_size class Identity(Transform): From 0c8ceebe7192240b22ce7f6d7950a3046e7a2c54 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Fri, 15 Jul 2022 13:23:56 +0200 Subject: [PATCH 3/3] Copied correctness test --- test/test_prototype_transforms_functional.py | 56 ++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index 77dad8dd857..e39eb4b6632 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -1,6 +1,7 @@ import functools import itertools import math +import os import numpy as np import pytest @@ -1566,3 +1567,58 @@ def _compute_expected_segmentation_mask(mask, output_size): expected = _compute_expected_segmentation_mask(mask, output_size) torch.testing.assert_close(expected, actual) + + +# Copied from test/test_functional_tensor.py +@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("image_size", ("small", "large")) +@pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) +@pytest.mark.parametrize("ksize", [(3, 3), [3, 5], (23, 23)]) +@pytest.mark.parametrize("sigma", [[0.5, 0.5], (0.5, 0.5), (0.8, 0.8), (1.7, 1.7)]) +def test_correctness_gaussian_blur_image_tensor(device, image_size, dt, ksize, sigma): + fn = F.gaussian_blur_image_tensor + + # true_cv2_results = { + # # np_img = np.arange(3 * 10 * 12, dtype="uint8").reshape((10, 12, 3)) + # # cv2.GaussianBlur(np_img, ksize=(3, 3), sigmaX=0.8) + # "3_3_0.8": ... + # # cv2.GaussianBlur(np_img, ksize=(3, 3), sigmaX=0.5) + # "3_3_0.5": ... + # # cv2.GaussianBlur(np_img, ksize=(3, 5), sigmaX=0.8) + # "3_5_0.8": ... + # # cv2.GaussianBlur(np_img, ksize=(3, 5), sigmaX=0.5) + # "3_5_0.5": ... + # # np_img2 = np.arange(26 * 28, dtype="uint8").reshape((26, 28)) + # # cv2.GaussianBlur(np_img2, ksize=(23, 23), sigmaX=1.7) + # "23_23_1.7": ... + # } + p = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets", "gaussian_blur_opencv_results.pt") + true_cv2_results = torch.load(p) + + if image_size == "small": + tensor = ( + torch.from_numpy(np.arange(3 * 10 * 12, dtype="uint8").reshape((10, 12, 3))).permute(2, 0, 1).to(device) + ) + else: + tensor = torch.from_numpy(np.arange(26 * 28, dtype="uint8").reshape((1, 26, 28))).to(device) + + if dt == torch.float16 and device == "cpu": + # skip float16 on CPU case + return + + if dt is not None: + tensor = tensor.to(dtype=dt) + + _ksize = (ksize, ksize) if isinstance(ksize, int) else ksize + _sigma = sigma[0] if sigma is not None else None + shape = tensor.shape + gt_key = f"{shape[-2]}_{shape[-1]}_{shape[-3]}__{_ksize[0]}_{_ksize[1]}_{_sigma}" + if gt_key not in true_cv2_results: + return + + true_out = ( + torch.tensor(true_cv2_results[gt_key]).reshape(shape[-2], shape[-1], shape[-3]).permute(2, 0, 1).to(tensor) + ) + + out = fn(tensor, kernel_size=ksize, sigma=sigma) + torch.testing.assert_close(out, true_out, rtol=0.0, atol=1.0, msg=f"{ksize}, {sigma}")