From 8d8245b65c389777bab7a7f06699d6bb5124c316 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 7 Jul 2022 12:50:37 +0200 Subject: [PATCH 01/14] Another attempt to add transforms --- .../prototype/features/_bounding_box.py | 9 - torchvision/prototype/features/_feature.py | 9 - torchvision/prototype/features/_image.py | 22 --- torchvision/prototype/features/_label.py | 11 -- .../prototype/features/_segmentation_mask.py | 9 - torchvision/prototype/transforms/_augment.py | 58 ++++-- .../prototype/transforms/_auto_augment.py | 73 ++------ torchvision/prototype/transforms/_color.py | 141 ++++++-------- torchvision/prototype/transforms/_geometry.py | 175 ++++-------------- .../transforms/functional/_geometry.py | 2 +- 10 files changed, 155 insertions(+), 354 deletions(-) diff --git a/torchvision/prototype/features/_bounding_box.py b/torchvision/prototype/features/_bounding_box.py index 49bd6eba865..7336b114c34 100644 --- a/torchvision/prototype/features/_bounding_box.py +++ b/torchvision/prototype/features/_bounding_box.py @@ -200,12 +200,3 @@ def perspective( output = _F.perspective_bounding_box(self, self.format, perspective_coeffs) return BoundingBox.new_like(self, output, dtype=output.dtype) - - def erase(self, i: int, j: int, h: int, w: int, v: torch.Tensor) -> BoundingBox: - raise TypeError("Erase transformation does not support bounding boxes") - - def mixup(self, lam: float) -> BoundingBox: - raise TypeError("Mixup transformation does not support bounding boxes") - - def cutmix(self, box: Tuple[int, int, int, int], lam_adjusted: float) -> BoundingBox: - raise TypeError("Cutmix transformation does not support bounding boxes") diff --git a/torchvision/prototype/features/_feature.py b/torchvision/prototype/features/_feature.py index e1d7d56d23d..d340273d7d3 100644 --- a/torchvision/prototype/features/_feature.py +++ b/torchvision/prototype/features/_feature.py @@ -186,12 +186,3 @@ def equalize(self) -> Any: def invert(self) -> Any: return self - - def erase(self, i: int, j: int, h: int, w: int, v: torch.Tensor) -> Any: - return self - - def mixup(self, lam: float) -> Any: - return self - - def cutmix(self, box: Tuple[int, int, int, int], lam_adjusted: float) -> Any: - return self diff --git a/torchvision/prototype/features/_image.py b/torchvision/prototype/features/_image.py index 6acbba38d62..a78ec8cec98 100644 --- a/torchvision/prototype/features/_image.py +++ b/torchvision/prototype/features/_image.py @@ -293,25 +293,3 @@ def invert(self) -> Image: output = _F.invert_image_tensor(self) return Image.new_like(self, output) - - def erase(self, i: int, j: int, h: int, w: int, v: torch.Tensor) -> Image: - from torchvision.prototype.transforms import functional as _F - - output = _F.erase_image_tensor(self, i, j, h, w, v) - return Image.new_like(self, output) - - def mixup(self, lam: float) -> Image: - if self.ndim < 4: - raise ValueError("Need a batch of images") - output = self.clone() - output = output.roll(1, -4).mul_(1 - lam).add_(output.mul_(lam)) - return Image.new_like(self, output) - - def cutmix(self, box: Tuple[int, int, int, int], lam_adjusted: float) -> Image: - if self.ndim < 4: - raise ValueError("Need a batch of images") - x1, y1, x2, y2 = box - image_rolled = self.roll(1, -4) - output = self.clone() - output[..., y1:y2, x1:x2] = image_rolled[..., y1:y2, x1:x2] - return Image.new_like(self, output) diff --git a/torchvision/prototype/features/_label.py b/torchvision/prototype/features/_label.py index 94e22f76f19..38e32f19057 100644 --- a/torchvision/prototype/features/_label.py +++ b/torchvision/prototype/features/_label.py @@ -77,14 +77,3 @@ def new_like( return super().new_like( other, data, categories=categories if categories is not None else other.categories, **kwargs ) - - def mixup(self, lam: float) -> OneHotLabel: - if self.ndim < 2: - raise ValueError("Need a batch of one hot labels") - output = self.clone() - output = output.roll(1, -2).mul_(1 - lam).add_(output.mul_(lam)) - return OneHotLabel.new_like(self, output) - - def cutmix(self, box: Tuple[int, int, int, int], lam_adjusted: float) -> OneHotLabel: - box # unused - return self.mixup(lam_adjusted) diff --git a/torchvision/prototype/features/_segmentation_mask.py b/torchvision/prototype/features/_segmentation_mask.py index 653f0f12ba4..7ea7e015d6c 100644 --- a/torchvision/prototype/features/_segmentation_mask.py +++ b/torchvision/prototype/features/_segmentation_mask.py @@ -113,12 +113,3 @@ def perspective( output = _F.perspective_segmentation_mask(self, perspective_coeffs) return SegmentationMask.new_like(self, output) - - def erase(self, i: int, j: int, h: int, w: int, v: torch.Tensor) -> SegmentationMask: - raise TypeError("Erase transformation does not support segmentation masks") - - def mixup(self, lam: float) -> SegmentationMask: - raise TypeError("Mixup transformation does not support segmentation masks") - - def cutmix(self, box: Tuple[int, int, int, int], lam_adjusted: float) -> SegmentationMask: - raise TypeError("Cutmix transformation does not support segmentation masks") diff --git a/torchvision/prototype/transforms/_augment.py b/torchvision/prototype/transforms/_augment.py index f4dad53a210..143721f7ee0 100644 --- a/torchvision/prototype/transforms/_augment.py +++ b/torchvision/prototype/transforms/_augment.py @@ -86,16 +86,19 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: return dict(i=i, j=j, h=h, w=w, v=v) def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: - if isinstance(inpt, features._Feature): - return inpt.erase(**params) + if isinstance(inpt, (features.Image, torch.Tensor)): + output = F.erase_image_tensor(inpt, **params) + if isinstance(inpt, features.Image): + return features.Image.new_like(inpt, output) + return output elif isinstance(inpt, PIL.Image.Image): # TODO: We should implement a fallback to tensor, like gaussian_blur etc raise RuntimeError("Not implemented") elif isinstance(inpt, torch.Tensor): return F.erase_image_tensor(inpt, **params) - else: - return inpt - + raise TypeError( + "RandomErasing transformation does not support bounding boxes, segmentation masks and plain labels" + ) class _BaseMixupCutmix(Transform): def __init__(self, *, alpha: float) -> None: @@ -110,15 +113,32 @@ def forward(self, *inpts: Any) -> Any: return super().forward(sample) +def _mixup_onehotlabel(inpt: features.OneHotLabel, lam: float) -> features.OneHotLabel: + if inpt.ndim < 2: + raise ValueError("Need a batch of one hot labels") + output = inpt.clone() + output = output.roll(1, -2).mul_(1 - lam).add_(output.mul_(lam)) + return features.OneHotLabel.new_like(inpt, output) + + class RandomMixup(_BaseMixupCutmix): def _get_params(self, sample: Any) -> Dict[str, Any]: return dict(lam=float(self._dist.sample(()))) def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: - if isinstance(inpt, features._Feature): - return inpt.mixup(**params) - else: - return inpt + lam = params["lam"] + if isinstance(inpt, features.Image): + if inpt.ndim < 4: + raise ValueError("Need a batch of images") + output = inpt.clone() + output = output.roll(1, -4).mul_(1 - lam).add_(output.mul_(lam)) + return features.Image.new_like(inpt, output) + if isinstance(inpt, features.OneHotLabel): + return _mixup_onehotlabel(inpt, lam) + + raise TypeError( + "RandomMixup transformation does not support bounding boxes, segmentation masks and plain labels" + ) class RandomCutmix(_BaseMixupCutmix): @@ -146,7 +166,19 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: return dict(box=box, lam_adjusted=lam_adjusted) def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: - if isinstance(inpt, features._Feature): - return inpt.cutmix(**params) - else: - return inpt + if isinstance(inpt, features.Image): + box = params["box"] + if inpt.ndim < 4: + raise ValueError("Need a batch of images") + x1, y1, x2, y2 = box + image_rolled = inpt.roll(1, -4) + output = inpt.clone() + output[..., y1:y2, x1:x2] = image_rolled[..., y1:y2, x1:x2] + return features.Image.new_like(inpt, output) + if isinstance(inpt, features.OneHotLabel): + lam_adjusted = params["lam_adjusted"] + return _mixup_onehotlabel(inpt, lam_adjusted) + + raise TypeError( + "RandomCutmix transformation does not support bounding boxes, segmentation masks and plain labels" + ) diff --git a/torchvision/prototype/transforms/_auto_augment.py b/torchvision/prototype/transforms/_auto_augment.py index 7fc62423ab8..31b24eb3b29 100644 --- a/torchvision/prototype/transforms/_auto_augment.py +++ b/torchvision/prototype/transforms/_auto_augment.py @@ -79,22 +79,6 @@ def _parse_fill( return fill - def _dispatch_image_kernels( - self, - image_tensor_kernel: Callable, - image_pil_kernel: Callable, - input: Any, - *args: Any, - **kwargs: Any, - ) -> Any: - if isinstance(input, features.Image): - output = image_tensor_kernel(input, *args, **kwargs) - return features.Image.new_like(input, output) - elif is_simple_tensor(input): - return image_tensor_kernel(input, *args, **kwargs) - else: # isinstance(input, PIL.Image.Image): - return image_pil_kernel(input, *args, **kwargs) - def _apply_image_transform( self, image: Any, @@ -106,9 +90,7 @@ def _apply_image_transform( if transform_id == "Identity": return image elif transform_id == "ShearX": - return self._dispatch_image_kernels( - F.affine_image_tensor, - F.affine_image_pil, + return F.affine( image, angle=0.0, translate=[0, 0], @@ -118,9 +100,7 @@ def _apply_image_transform( fill=fill, ) elif transform_id == "ShearY": - return self._dispatch_image_kernels( - F.affine_image_tensor, - F.affine_image_pil, + return F.affine( image, angle=0.0, translate=[0, 0], @@ -130,9 +110,7 @@ def _apply_image_transform( fill=fill, ) elif transform_id == "TranslateX": - return self._dispatch_image_kernels( - F.affine_image_tensor, - F.affine_image_pil, + return F.affine( image, angle=0.0, translate=[int(magnitude), 0], @@ -142,9 +120,7 @@ def _apply_image_transform( fill=fill, ) elif transform_id == "TranslateY": - return self._dispatch_image_kernels( - F.affine_image_tensor, - F.affine_image_pil, + return F.affine( image, angle=0.0, translate=[0, int(magnitude)], @@ -154,46 +130,25 @@ def _apply_image_transform( fill=fill, ) elif transform_id == "Rotate": - return self._dispatch_image_kernels(F.rotate_image_tensor, F.rotate_image_pil, image, angle=magnitude) + return F.rotate(image, angle=magnitude) elif transform_id == "Brightness": - return self._dispatch_image_kernels( - F.adjust_brightness_image_tensor, - F.adjust_brightness_image_pil, - image, - brightness_factor=1.0 + magnitude, - ) + return F.adjust_brightness(image, brightness_factor=1.0 + magnitude) elif transform_id == "Color": - return self._dispatch_image_kernels( - F.adjust_saturation_image_tensor, - F.adjust_saturation_image_pil, - image, - saturation_factor=1.0 + magnitude, - ) + return F.adjust_saturation(image, saturation_factor=1.0 + magnitude) elif transform_id == "Contrast": - return self._dispatch_image_kernels( - F.adjust_contrast_image_tensor, F.adjust_contrast_image_pil, image, contrast_factor=1.0 + magnitude - ) + return F.adjust_contrast(image, contrast_factor=1.0 + magnitude) elif transform_id == "Sharpness": - return self._dispatch_image_kernels( - F.adjust_sharpness_image_tensor, - F.adjust_sharpness_image_pil, - image, - sharpness_factor=1.0 + magnitude, - ) + return F.adjust_sharpness(image, sharpness_factor=1.0 + magnitude) elif transform_id == "Posterize": - return self._dispatch_image_kernels( - F.posterize_image_tensor, F.posterize_image_pil, image, bits=int(magnitude) - ) + return F.posterize(image, bits=int(magnitude)) elif transform_id == "Solarize": - return self._dispatch_image_kernels( - F.solarize_image_tensor, F.solarize_image_pil, image, threshold=magnitude - ) + return F.solarize(image, threshold=magnitude) elif transform_id == "AutoContrast": - return self._dispatch_image_kernels(F.autocontrast_image_tensor, F.autocontrast_image_pil, image) + return F.autocontrast(image) elif transform_id == "Equalize": - return self._dispatch_image_kernels(F.equalize_image_tensor, F.equalize_image_pil, image) + return F.equalize(image) elif transform_id == "Invert": - return self._dispatch_image_kernels(F.invert_image_tensor, F.invert_image_pil, image) + return F.invert(image) else: raise ValueError(f"No transform available for {transform_id}") diff --git a/torchvision/prototype/transforms/_color.py b/torchvision/prototype/transforms/_color.py index 960020baff8..60fe46ed9ea 100644 --- a/torchvision/prototype/transforms/_color.py +++ b/torchvision/prototype/transforms/_color.py @@ -1,5 +1,4 @@ import collections.abc -import functools from typing import Any, Dict, Union, Tuple, Optional, Sequence, Callable, TypeVar import PIL.Image @@ -55,74 +54,52 @@ def _check_input( def _image_transform( self, - input: T, + inpt: T, *, kernel_tensor: Callable[..., torch.Tensor], kernel_pil: Callable[..., PIL.Image.Image], **kwargs: Any, ) -> T: - if isinstance(input, features.Image): - output = kernel_tensor(input, **kwargs) - return features.Image.new_like(input, output) - elif is_simple_tensor(input): - return kernel_tensor(input, **kwargs) - elif isinstance(input, PIL.Image.Image): - return kernel_pil(input, **kwargs) # type: ignore[no-any-return] + if isinstance(inpt, features.Image): + output = kernel_tensor(inpt, **kwargs) + return features.Image.new_like(inpt, output) + elif is_simple_tensor(inpt): + return kernel_tensor(inpt, **kwargs) + elif isinstance(inpt, PIL.Image.Image): + return kernel_pil(inpt, **kwargs) # type: ignore[no-any-return] else: raise RuntimeError + @staticmethod + def _generate_value(left: float, right: float) -> float: + return float(torch.distributions.Uniform(left, right).sample()) + def _get_params(self, sample: Any) -> Dict[str, Any]: - image_transforms = [] - if self.brightness is not None: - image_transforms.append( - functools.partial( - self._image_transform, - kernel_tensor=F.adjust_brightness_image_tensor, - kernel_pil=F.adjust_brightness_image_pil, - brightness_factor=float( - torch.distributions.Uniform(self.brightness[0], self.brightness[1]).sample() - ), - ) - ) - if self.contrast is not None: - image_transforms.append( - functools.partial( - self._image_transform, - kernel_tensor=F.adjust_contrast_image_tensor, - kernel_pil=F.adjust_contrast_image_pil, - contrast_factor=float(torch.distributions.Uniform(self.contrast[0], self.contrast[1]).sample()), - ) - ) - if self.saturation is not None: - image_transforms.append( - functools.partial( - self._image_transform, - kernel_tensor=F.adjust_saturation_image_tensor, - kernel_pil=F.adjust_saturation_image_pil, - saturation_factor=float( - torch.distributions.Uniform(self.saturation[0], self.saturation[1]).sample() - ), - ) - ) - if self.hue is not None: - image_transforms.append( - functools.partial( - self._image_transform, - kernel_tensor=F.adjust_hue_image_tensor, - kernel_pil=F.adjust_hue_image_pil, - hue_factor=float(torch.distributions.Uniform(self.hue[0], self.hue[1]).sample()), - ) - ) - - return dict(image_transforms=[image_transforms[idx] for idx in torch.randperm(len(image_transforms))]) - - def _transform(self, input: Any, params: Dict[str, Any]) -> Any: - if not (isinstance(input, (features.Image, PIL.Image.Image)) or is_simple_tensor(input)): - return input - - for transform in params["image_transforms"]: - input = transform(input) - return input + fn_idx = torch.randperm(4) + + b = None if self.brightness is None else self._generate_value(self.brightness[0], self.brightness[1]) + c = None if self.contrast is None else self._generate_value(self.contrast[0], self.contrast[1]) + s = None if self.saturation is None else self._generate_value(self.saturation[0], self.saturation[1]) + h = None if self.hue is None else self._generate_value(self.hue[0], self.hue[1]) + + return dict(fn_idx=fn_idx, brightness_factor=b, contrast_factor=c, saturation_factor=s, hue_factor=h) + + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + output = inpt + brightness_factor = params["brightness_factor"] + contrast_factor = params["contrast_factor"] + saturation_factor = params["saturation_factor"] + hue_factor = params["hue_factor"] + for fn_id in params["fn_idx"]: + if fn_id == 0 and brightness_factor is not None: + output = F.adjust_brightness(output, brightness_factor=brightness_factor) + elif fn_id == 1 and contrast_factor is not None: + output = F.adjust_contrast(output, contrast_factor=contrast_factor) + elif fn_id == 2 and saturation_factor is not None: + output = F.adjust_saturation(output, saturation_factor=saturation_factor) + elif fn_id == 3 and hue_factor is not None: + output = F.adjust_hue(output, hue_factor=hue_factor) + return output class _RandomChannelShuffle(Transform): @@ -131,19 +108,19 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: num_channels, _, _ = get_image_dimensions(image) return dict(permutation=torch.randperm(num_channels)) - def _transform(self, input: Any, params: Dict[str, Any]) -> Any: - if not (isinstance(input, (features.Image, PIL.Image.Image)) or is_simple_tensor(input)): - return input + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + if not (isinstance(inpt, (features.Image, PIL.Image.Image)) or is_simple_tensor(inpt)): + return inpt - image = input - if isinstance(input, PIL.Image.Image): + image = inpt + if isinstance(inpt, PIL.Image.Image): image = _F.pil_to_tensor(image) output = image[..., params["permutation"], :, :] - if isinstance(input, features.Image): - output = features.Image.new_like(input, output, color_space=features.ColorSpace.OTHER) - elif isinstance(input, PIL.Image.Image): + if isinstance(inpt, features.Image): + output = features.Image.new_like(inpt, output, color_space=features.ColorSpace.OTHER) + elif isinstance(inpt, PIL.Image.Image): output = _F.to_pil_image(output) return output @@ -175,33 +152,25 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: contrast_before=torch.rand(()) < 0.5, ) - def _transform(self, input: Any, params: Dict[str, Any]) -> Any: + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: if params["brightness"]: - input = self._brightness(input) + inpt = self._brightness(inpt) if params["contrast1"] and params["contrast_before"]: - input = self._contrast(input) + inpt = self._contrast(inpt) if params["saturation"]: - input = self._saturation(input) + inpt = self._saturation(inpt) if params["saturation"]: - input = self._saturation(input) + inpt = self._saturation(inpt) if params["contrast2"] and not params["contrast_before"]: - input = self._contrast(input) + inpt = self._contrast(inpt) if params["channel_shuffle"]: - input = self._channel_shuffle(input) - return input + inpt = self._channel_shuffle(inpt) + return inpt class RandomEqualize(_RandomApplyTransform): def __init__(self, p: float = 0.5): super().__init__(p=p) - def _transform(self, input: Any, params: Dict[str, Any]) -> Any: - if isinstance(input, features.Image): - output = F.equalize_image_tensor(input) - return features.Image.new_like(input, output) - elif is_simple_tensor(input): - return F.equalize_image_tensor(input) - elif isinstance(input, PIL.Image.Image): - return F.equalize_image_pil(input) - else: - return input + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + return F.equalize(inpt) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 0487a71416e..45699f0993e 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -2,14 +2,14 @@ import math import numbers import warnings -from typing import Any, Dict, List, Union, Sequence, Tuple, cast +from typing import Any, Dict, List, Optional, Union, Sequence, Tuple, cast import PIL.Image import torch from torchvision.prototype import features from torchvision.prototype.transforms import Transform, functional as F from torchvision.transforms.functional import pil_to_tensor, InterpolationMode -from torchvision.transforms.transforms import _setup_size, _interpolation_modes_from_int +from torchvision.transforms.transforms import _setup_size from typing_extensions import Literal from ._transform import _RandomApplyTransform @@ -17,41 +17,13 @@ class RandomHorizontalFlip(_RandomApplyTransform): - def _transform(self, input: Any, params: Dict[str, Any]) -> Any: - if isinstance(input, features.Image): - output = F.horizontal_flip_image_tensor(input) - return features.Image.new_like(input, output) - elif isinstance(input, features.SegmentationMask): - output = F.horizontal_flip_segmentation_mask(input) - return features.SegmentationMask.new_like(input, output) - elif isinstance(input, features.BoundingBox): - output = F.horizontal_flip_bounding_box(input, format=input.format, image_size=input.image_size) - return features.BoundingBox.new_like(input, output) - elif isinstance(input, PIL.Image.Image): - return F.horizontal_flip_image_pil(input) - elif is_simple_tensor(input): - return F.horizontal_flip_image_tensor(input) - else: - return input + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + return F.horizontal_flip(inpt) class RandomVerticalFlip(_RandomApplyTransform): - def _transform(self, input: Any, params: Dict[str, Any]) -> Any: - if isinstance(input, features.Image): - output = F.vertical_flip_image_tensor(input) - return features.Image.new_like(input, output) - elif isinstance(input, features.SegmentationMask): - output = F.vertical_flip_segmentation_mask(input) - return features.SegmentationMask.new_like(input, output) - elif isinstance(input, features.BoundingBox): - output = F.vertical_flip_bounding_box(input, format=input.format, image_size=input.image_size) - return features.BoundingBox.new_like(input, output) - elif isinstance(input, PIL.Image.Image): - return F.vertical_flip_image_pil(input) - elif is_simple_tensor(input): - return F.vertical_flip_image_tensor(input) - else: - return input + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + return F.vertical_flip(inpt) class Resize(Transform): @@ -59,27 +31,23 @@ def __init__( self, size: Union[int, Sequence[int]], interpolation: InterpolationMode = InterpolationMode.BILINEAR, + max_size: Optional[int] = None, + antialias: Optional[bool] = None, ) -> None: super().__init__() self.size = [size] if isinstance(size, int) else list(size) self.interpolation = interpolation + self.max_size = max_size + self.antialias = antialias - def _transform(self, input: Any, params: Dict[str, Any]) -> Any: - if isinstance(input, features.Image): - output = F.resize_image_tensor(input, self.size, interpolation=self.interpolation) - return features.Image.new_like(input, output) - elif isinstance(input, features.SegmentationMask): - output = F.resize_segmentation_mask(input, self.size) - return features.SegmentationMask.new_like(input, output) - elif isinstance(input, features.BoundingBox): - output = F.resize_bounding_box(input, self.size, image_size=input.image_size) - return features.BoundingBox.new_like(input, output, image_size=cast(Tuple[int, int], tuple(self.size))) - elif isinstance(input, PIL.Image.Image): - return F.resize_image_pil(input, self.size, interpolation=self.interpolation) - elif is_simple_tensor(input): - return F.resize_image_tensor(input, self.size, interpolation=self.interpolation) - else: - return input + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + return F.resize( + inpt, + self.size, + interpolation=self.interpolation, + max_size=self.max_size, + antialias=self.antialias, + ) class CenterCrop(Transform): @@ -87,22 +55,8 @@ def __init__(self, output_size: List[int]): super().__init__() self.output_size = output_size - def _transform(self, input: Any, params: Dict[str, Any]) -> Any: - if isinstance(input, features.Image): - output = F.center_crop_image_tensor(input, self.output_size) - return features.Image.new_like(input, output) - elif is_simple_tensor(input): - return F.center_crop_image_tensor(input, self.output_size) - elif isinstance(input, PIL.Image.Image): - return F.center_crop_image_pil(input, self.output_size) - else: - return input - - def forward(self, *inputs: Any) -> Any: - sample = inputs if len(inputs) > 1 else inputs[0] - if has_any(sample, features.BoundingBox, features.SegmentationMask): - raise TypeError(f"BoundingBox'es and SegmentationMask's are not supported by {type(self).__name__}()") - return super().forward(sample) + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + return F.center_crop(inpt, output_size=self.output_size) class RandomResizedCrop(Transform): @@ -112,6 +66,7 @@ def __init__( scale: Tuple[float, float] = (0.08, 1.0), ratio: Tuple[float, float] = (3.0 / 4.0, 4.0 / 3.0), interpolation: InterpolationMode = InterpolationMode.BILINEAR, + antialias: Optional[bool] = None, ) -> None: super().__init__() self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.") @@ -125,20 +80,16 @@ def __init__( if (scale[0] > scale[1]) or (ratio[0] > ratio[1]): warnings.warn("Scale and ratio should be of kind (min, max)") - # Backward compatibility with integer value - if isinstance(interpolation, int): - warnings.warn( - "Argument interpolation should be of type InterpolationMode instead of int. " - "Please, use InterpolationMode enum." - ) - interpolation = _interpolation_modes_from_int(interpolation) - self.size = size self.scale = scale self.ratio = ratio self.interpolation = interpolation + self.antialias = antialias def _get_params(self, sample: Any) -> Dict[str, Any]: + # vfdev-5: techically, this op can work on bboxes/segm masks only inputs without image in samples + # What if we have multiple images/bboxes/masks of different sizes ? + # TODO: let's support bbox or mask in samples without image image = query_image(sample) _, height, width = get_image_dimensions(image) area = height * width @@ -177,24 +128,10 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: return dict(top=i, left=j, height=h, width=w) - def _transform(self, input: Any, params: Dict[str, Any]) -> Any: - if isinstance(input, features.Image): - output = F.resized_crop_image_tensor( - input, **params, size=list(self.size), interpolation=self.interpolation - ) - return features.Image.new_like(input, output) - elif is_simple_tensor(input): - return F.resized_crop_image_tensor(input, **params, size=list(self.size), interpolation=self.interpolation) - elif isinstance(input, PIL.Image.Image): - return F.resized_crop_image_pil(input, **params, size=list(self.size), interpolation=self.interpolation) - else: - return input - - def forward(self, *inputs: Any) -> Any: - sample = inputs if len(inputs) > 1 else inputs[0] - if has_any(sample, features.BoundingBox, features.SegmentationMask): - raise TypeError(f"BoundingBox'es and SegmentationMask's are not supported by {type(self).__name__}()") - return super().forward(sample) + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + return F.resized_crop( + inpt, **params, size=self.size, interpolation=self.interpolation, antialias=self.antialias + ) class MultiCropResult(list): @@ -294,7 +231,7 @@ def __init__( if not isinstance(padding, (numbers.Number, tuple, list)): raise TypeError("Got inappropriate padding arg") - if not isinstance(fill, (numbers.Number, str, tuple, list)): + if not isinstance(fill, (numbers.Number, tuple, list)): raise TypeError("Got inappropriate fill arg") if padding_mode not in ["constant", "edge", "reflect", "symmetric"]: @@ -309,45 +246,9 @@ def __init__( self.fill = fill self.padding_mode = padding_mode - def _transform(self, input: Any, params: Dict[str, Any]) -> Any: - if isinstance(input, features.Image) or is_simple_tensor(input): - # PyTorch's pad supports only integers on fill. So we need to overwrite the colour - output = F.pad_image_tensor(input, params["padding"], fill=0, padding_mode="constant") - - left, top, right, bottom = params["padding"] - fill = torch.tensor(params["fill"], dtype=input.dtype, device=input.device).to().view(-1, 1, 1) - - if top > 0: - output[..., :top, :] = fill - if left > 0: - output[..., :, :left] = fill - if bottom > 0: - output[..., -bottom:, :] = fill - if right > 0: - output[..., :, -right:] = fill - - if isinstance(input, features.Image): - output = features.Image.new_like(input, output) - - return output - elif isinstance(input, PIL.Image.Image): - return F.pad_image_pil( - input, - params["padding"], - fill=tuple(int(v) if input.mode != "F" else v for v in params["fill"]), - padding_mode="constant", - ) - elif isinstance(input, features.BoundingBox): - output = F.pad_bounding_box(input, params["padding"], format=input.format) - - left, top, right, bottom = params["padding"] - height, width = input.image_size - height += top + bottom - width += left + right - - return features.BoundingBox.new_like(input, output, image_size=(height, width)) - else: - return input + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + # TODO: Fix typing error + return F.pad(inpt, padding=self.padding, fill=self.fill, padding_mode=self.padding_mode) # type: ignore[arg-type] class RandomZoomOut(_RandomApplyTransform): @@ -364,6 +265,8 @@ def __init__( if side_range[0] < 1.0 or side_range[0] > side_range[1]: raise ValueError(f"Invalid canvas side range provided {side_range}.") + self._pad_op = Pad(0, padding_mode="constant") + def _get_params(self, sample: Any) -> Dict[str, Any]: image = query_image(sample) orig_c, orig_h, orig_w = get_image_dimensions(image) @@ -385,6 +288,8 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: return dict(padding=padding, fill=fill) - def _transform(self, input: Any, params: Dict[str, Any]) -> Any: - transform = Pad(**params, padding_mode="constant") - return transform(input) + def forward(self, *inputs: Any) -> Any: + params = self._get_params(inputs) + self._pad_op.padding = params["padding"] + self._pad_op.fill = params["fill"] + return self._pad_op(*inputs) diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 5044441d612..9ffdfaff055 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -521,7 +521,7 @@ def _pad_with_vector_fill( raise ValueError(f"Padding mode '{padding_mode}' is not supported if fill is not scalar") output = pad_image_tensor(img, padding, fill=0, padding_mode="constant") - left, top, right, bottom = padding + left, top, right, bottom = _FT._parse_pad_padding(padding) fill = torch.tensor(fill, dtype=img.dtype, device=img.device).view(-1, 1, 1) if top > 0: From a4ce05f6cff11dca949ae55a6a4807d66a59b5ca Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 7 Jul 2022 16:23:20 +0200 Subject: [PATCH 02/14] Fixed padding type hint --- test/test_functional_tensor.py | 13 +------------ test/test_prototype_transforms_functional.py | 2 +- torchvision/prototype/features/_bounding_box.py | 13 ++++++++++--- torchvision/prototype/features/_feature.py | 5 ++++- torchvision/prototype/features/_image.py | 9 ++++++++- .../prototype/features/_segmentation_mask.py | 9 ++++++++- torchvision/prototype/transforms/_geometry.py | 3 +-- .../transforms/functional/_geometry.py | 17 ++++++++++++----- torchvision/transforms/functional_tensor.py | 4 ++-- 9 files changed, 47 insertions(+), 28 deletions(-) diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py index 649fc5ed1b0..969aedf6d2d 100644 --- a/test/test_functional_tensor.py +++ b/test/test_functional_tensor.py @@ -955,18 +955,7 @@ def test_adjust_gamma(device, dtype, config, channels): @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) -@pytest.mark.parametrize( - "pad", - [ - 2, - [ - 3, - ], - [0, 3], - (3, 3), - [4, 2, 4, 3], - ], -) +@pytest.mark.parametrize("pad", [2, [3], [0, 3], (3, 3), [4, 2, 4, 3]]) @pytest.mark.parametrize( "config", [ diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index d4fb3136ff4..c880e8db55b 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -317,7 +317,7 @@ def rotate_image_tensor(): [-87, 15, 90], # angle [True, False], # expand [None, [12, 23]], # center - [None, [128]], # fill + [None, [128], [12.0]], # fill ): if center is not None and expand: # Skip warning: The provided center argument is ignored if expand is True diff --git a/torchvision/prototype/features/_bounding_box.py b/torchvision/prototype/features/_bounding_box.py index 7336b114c34..a359dd6bd8f 100644 --- a/torchvision/prototype/features/_bounding_box.py +++ b/torchvision/prototype/features/_bounding_box.py @@ -128,13 +128,20 @@ def resized_crop( return BoundingBox.new_like(self, output, image_size=image_size, dtype=output.dtype) def pad( - self, padding: List[int], fill: Union[int, float, Sequence[float]] = 0, padding_mode: str = "constant" + self, + padding: Union[int, Sequence[int]], + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + padding_mode: str = "constant", ) -> BoundingBox: from torchvision.prototype.transforms import functional as _F if padding_mode not in ["constant"]: raise ValueError(f"Padding mode '{padding_mode}' is not supported with bounding boxes") + # This cast does Sequence[int] -> List[int] and is required to make mypy happy + if not isinstance(padding, int): + padding = list(padding) + output = _F.pad_bounding_box(self, padding, format=self.format) # Update output image size: @@ -153,7 +160,7 @@ def rotate( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> BoundingBox: from torchvision.prototype.transforms import functional as _F @@ -173,7 +180,7 @@ def affine( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> BoundingBox: from torchvision.prototype.transforms import functional as _F diff --git a/torchvision/prototype/features/_feature.py b/torchvision/prototype/features/_feature.py index d340273d7d3..4a766253294 100644 --- a/torchvision/prototype/features/_feature.py +++ b/torchvision/prototype/features/_feature.py @@ -120,7 +120,10 @@ def resized_crop( return self def pad( - self, padding: List[int], fill: Union[int, float, Sequence[float]] = 0, padding_mode: str = "constant" + self, + padding: Union[int, Sequence[int]], + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + padding_mode: str = "constant", ) -> Any: return self diff --git a/torchvision/prototype/features/_image.py b/torchvision/prototype/features/_image.py index a78ec8cec98..b2a3fb3d972 100644 --- a/torchvision/prototype/features/_image.py +++ b/torchvision/prototype/features/_image.py @@ -164,10 +164,17 @@ def resized_crop( return Image.new_like(self, output) def pad( - self, padding: List[int], fill: Union[int, float, Sequence[float]] = 0, padding_mode: str = "constant" + self, + padding: Union[int, Sequence[int]], + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + padding_mode: str = "constant", ) -> Image: from torchvision.prototype.transforms import functional as _F + # This cast does Sequence[int] -> List[int] and is required to make mypy happy + if not isinstance(padding, int): + padding = list(padding) + # PyTorch's pad supports only scalars on fill. So we need to overwrite the colour if isinstance(fill, (int, float)): output = _F.pad_image_tensor(self, padding, fill=fill, padding_mode=padding_mode) diff --git a/torchvision/prototype/features/_segmentation_mask.py b/torchvision/prototype/features/_segmentation_mask.py index 7ea7e015d6c..d2859b08707 100644 --- a/torchvision/prototype/features/_segmentation_mask.py +++ b/torchvision/prototype/features/_segmentation_mask.py @@ -61,10 +61,17 @@ def resized_crop( return SegmentationMask.new_like(self, output) def pad( - self, padding: List[int], fill: Union[int, float, Sequence[float]] = 0, padding_mode: str = "constant" + self, + padding: Union[int, Sequence[int]], + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + padding_mode: str = "constant", ) -> SegmentationMask: from torchvision.prototype.transforms import functional as _F + # This cast does Sequence[int] -> List[int] and is required to make mypy happy + if not isinstance(padding, int): + padding = list(padding) + output = _F.pad_segmentation_mask(self, padding, padding_mode=padding_mode) return SegmentationMask.new_like(self, output) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index 45699f0993e..e0a6adc38ee 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -247,8 +247,7 @@ def __init__( self.padding_mode = padding_mode def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: - # TODO: Fix typing error - return F.pad(inpt, padding=self.padding, fill=self.fill, padding_mode=self.padding_mode) # type: ignore[arg-type] + return F.pad(inpt, padding=self.padding, fill=self.fill, padding_mode=self.padding_mode) class RandomZoomOut(_RandomApplyTransform): diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 9ffdfaff055..65f24c5055b 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -497,7 +497,7 @@ def rotate( def pad_image_tensor( - img: torch.Tensor, padding: List[int], fill: Union[int, float] = 0, padding_mode: str = "constant" + img: torch.Tensor, padding: Union[int, List[int]], fill: Union[int, float] = 0, padding_mode: str = "constant" ) -> torch.Tensor: num_masks, height, width = img.shape[-3:] extra_dims = img.shape[:-3] @@ -513,7 +513,7 @@ def pad_image_tensor( # TODO: This should be removed once pytorch pad supports non-scalar padding values def _pad_with_vector_fill( img: torch.Tensor, - padding: List[int], + padding: Union[int, List[int]], fill: Sequence[float] = [0.0], padding_mode: str = "constant", ) -> torch.Tensor: @@ -536,7 +536,7 @@ def _pad_with_vector_fill( def pad_segmentation_mask( - segmentation_mask: torch.Tensor, padding: List[int], padding_mode: str = "constant" + segmentation_mask: torch.Tensor, padding: Union[int, List[int]], padding_mode: str = "constant" ) -> torch.Tensor: num_masks, height, width = segmentation_mask.shape[-3:] extra_dims = segmentation_mask.shape[:-3] @@ -550,7 +550,7 @@ def pad_segmentation_mask( def pad_bounding_box( - bounding_box: torch.Tensor, padding: List[int], format: features.BoundingBoxFormat + bounding_box: torch.Tensor, padding: Union[int, List[int]], format: features.BoundingBoxFormat ) -> torch.Tensor: left, _, top, _ = _FT._parse_pad_padding(padding) @@ -566,13 +566,20 @@ def pad_bounding_box( def pad( - inpt: DType, padding: List[int], fill: Union[int, float, Sequence[float]] = 0.0, padding_mode: str = "constant" + inpt: DType, + padding: Union[int, Sequence[int]], + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + padding_mode: str = "constant" ) -> DType: if isinstance(inpt, features._Feature): return inpt.pad(padding, fill=fill, padding_mode=padding_mode) if isinstance(inpt, PIL.Image.Image): return pad_image_pil(inpt, padding, fill=fill, padding_mode=padding_mode) + # This cast does Sequence[int] -> List[int] and is required to make mypy happy + if not isinstance(padding, int): + padding = list(padding) + # TODO: PyTorch's pad supports only scalars on fill. So we need to overwrite the colour if isinstance(fill, (int, float)): return pad_image_tensor(inpt, padding, fill=fill, padding_mode=padding_mode) diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 50e41647af1..5953fe50dcd 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -350,7 +350,7 @@ def _pad_symmetric(img: Tensor, padding: List[int]) -> Tensor: raise RuntimeError("Symmetric padding of N-D tensors are not supported yet") -def _parse_pad_padding(padding: List[int]) -> List[int]: +def _parse_pad_padding(padding: Union[int, List[int]]) -> List[int]: if isinstance(padding, int): if torch.jit.is_scripting(): # This maybe unreachable @@ -370,7 +370,7 @@ def _parse_pad_padding(padding: List[int]) -> List[int]: return [pad_left, pad_right, pad_top, pad_bottom] -def pad(img: Tensor, padding: List[int], fill: Union[int, float] = 0, padding_mode: str = "constant") -> Tensor: +def pad(img: Tensor, padding: Union[int, List[int]], fill: Union[int, float] = 0, padding_mode: str = "constant") -> Tensor: _assert_image_tensor(img) if not isinstance(padding, (int, tuple, list)): From 5a42b64edfd08c002b27df32a953abe4b6c098aa Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 7 Jul 2022 17:03:51 +0200 Subject: [PATCH 03/14] Fixed fill arg for pad and rotate, affine --- .../prototype/features/_bounding_box.py | 4 +- torchvision/prototype/features/_feature.py | 4 +- torchvision/prototype/features/_image.py | 20 ++++++- .../prototype/features/_segmentation_mask.py | 4 +- .../prototype/transforms/_auto_augment.py | 23 ++++---- torchvision/prototype/transforms/_geometry.py | 55 ++++++++++++++++--- .../transforms/functional/_geometry.py | 26 +++++++-- torchvision/transforms/functional_pil.py | 4 +- 8 files changed, 109 insertions(+), 31 deletions(-) diff --git a/torchvision/prototype/features/_bounding_box.py b/torchvision/prototype/features/_bounding_box.py index a359dd6bd8f..319e6d80927 100644 --- a/torchvision/prototype/features/_bounding_box.py +++ b/torchvision/prototype/features/_bounding_box.py @@ -160,7 +160,7 @@ def rotate( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Union[int, float, Sequence[float]] = 0, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> BoundingBox: from torchvision.prototype.transforms import functional as _F @@ -180,7 +180,7 @@ def affine( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Union[int, float, Sequence[float]] = 0, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> BoundingBox: from torchvision.prototype.transforms import functional as _F diff --git a/torchvision/prototype/features/_feature.py b/torchvision/prototype/features/_feature.py index 4a766253294..e6c4e55fd6c 100644 --- a/torchvision/prototype/features/_feature.py +++ b/torchvision/prototype/features/_feature.py @@ -132,7 +132,7 @@ def rotate( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> Any: return self @@ -144,7 +144,7 @@ def affine( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> Any: return self diff --git a/torchvision/prototype/features/_image.py b/torchvision/prototype/features/_image.py index b2a3fb3d972..62b17140725 100644 --- a/torchvision/prototype/features/_image.py +++ b/torchvision/prototype/features/_image.py @@ -190,11 +190,19 @@ def rotate( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> Image: from torchvision.prototype.transforms import functional as _F + # This cast does Sequence -> List[float] to please mypy and torch.jit.script + if not isinstance(fill, (int, float)): + fill = [float(v) for v in list(fill)] + + if isinstance(fill, (int, float)): + # It is OK to cast int to float as later we use inpt.dtype + fill = [float(fill)] + output = _F.rotate_image_tensor( self, angle, interpolation=interpolation, expand=expand, fill=fill, center=center ) @@ -207,11 +215,19 @@ def affine( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> Image: from torchvision.prototype.transforms import functional as _F + # This cast does Sequence -> List[float] to please mypy and torch.jit.script + if not isinstance(fill, (int, float)): + fill = [float(v) for v in list(fill)] + + if isinstance(fill, (int, float)): + # It is OK to cast int to float as later we use inpt.dtype + fill = [float(fill)] + output = _F.affine_image_tensor( self, angle, diff --git a/torchvision/prototype/features/_segmentation_mask.py b/torchvision/prototype/features/_segmentation_mask.py index d2859b08707..765f48be63e 100644 --- a/torchvision/prototype/features/_segmentation_mask.py +++ b/torchvision/prototype/features/_segmentation_mask.py @@ -80,7 +80,7 @@ def rotate( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> SegmentationMask: from torchvision.prototype.transforms import functional as _F @@ -95,7 +95,7 @@ def affine( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> SegmentationMask: from torchvision.prototype.transforms import functional as _F diff --git a/torchvision/prototype/transforms/_auto_augment.py b/torchvision/prototype/transforms/_auto_augment.py index 31b24eb3b29..8508f3549f0 100644 --- a/torchvision/prototype/transforms/_auto_augment.py +++ b/torchvision/prototype/transforms/_auto_augment.py @@ -1,5 +1,5 @@ import math -from typing import Any, Dict, Tuple, Optional, Callable, List, cast, TypeVar, Union, Type +from typing import Any, Dict, Tuple, Optional, Callable, List, cast, Sequence, TypeVar, Union, Type import PIL.Image import torch @@ -29,7 +29,10 @@ def _put_into_sample(sample: Any, id: Tuple[Any, ...], item: Any) -> Any: class _AutoAugmentBase(Transform): def __init__( - self, *, interpolation: InterpolationMode = InterpolationMode.NEAREST, fill: Optional[List[float]] = None + self, + *, + interpolation: InterpolationMode = InterpolationMode.NEAREST, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, ) -> None: super().__init__() self.interpolation = interpolation @@ -66,11 +69,11 @@ def fn( def _parse_fill( self, image: Union[PIL.Image.Image, torch.Tensor, features.Image], num_channels: int - ) -> Optional[List[float]]: + ) -> Union[int, float, Sequence[int], Sequence[float]]: fill = self.fill - if isinstance(image, PIL.Image.Image) or fill is None: - return fill + # if isinstance(image, PIL.Image.Image) or fill is None: + # return fill if isinstance(fill, (int, float)): fill = [float(fill)] * num_channels @@ -85,7 +88,7 @@ def _apply_image_transform( transform_id: str, magnitude: float, interpolation: InterpolationMode, - fill: Optional[List[float]], + fill: Union[int, float, Sequence[int], Sequence[float]], ) -> Any: if transform_id == "Identity": return image @@ -186,7 +189,7 @@ def __init__( self, policy: AutoAugmentPolicy = AutoAugmentPolicy.IMAGENET, interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, ) -> None: super().__init__(interpolation=interpolation, fill=fill) self.policy = policy @@ -348,7 +351,7 @@ def __init__( magnitude: int = 9, num_magnitude_bins: int = 31, interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, ) -> None: super().__init__(interpolation=interpolation, fill=fill) self.num_ops = num_ops @@ -408,7 +411,7 @@ def __init__( *, num_magnitude_bins: int = 31, interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, ): super().__init__(interpolation=interpolation, fill=fill) self.num_magnitude_bins = num_magnitude_bins @@ -467,7 +470,7 @@ def __init__( alpha: float = 1.0, all_ops: bool = True, interpolation: InterpolationMode = InterpolationMode.BILINEAR, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, ) -> None: super().__init__(interpolation=interpolation, fill=fill) self._PARAMETER_MAX = 10 diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index e0a6adc38ee..dfb20718134 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -9,7 +9,7 @@ from torchvision.prototype import features from torchvision.prototype.transforms import Transform, functional as F from torchvision.transforms.functional import pil_to_tensor, InterpolationMode -from torchvision.transforms.transforms import _setup_size +from torchvision.transforms.transforms import _setup_size, _setup_angle, _check_sequence_input from typing_extensions import Literal from ._transform import _RandomApplyTransform @@ -220,19 +220,23 @@ def apply_recursively(obj: Any) -> Any: return apply_recursively(inputs if len(inputs) > 1 else inputs[0]) +def _check_fill_arg(fill: Union[int, float, Sequence[int], Sequence[float]]) -> None: + if not isinstance(fill, (numbers.Number, tuple, list)): + raise TypeError("Got inappropriate fill arg") + + class Pad(Transform): def __init__( self, padding: Union[int, Sequence[int]], - fill: Union[float, Sequence[float]] = 0.0, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, padding_mode: Literal["constant", "edge", "reflect", "symmetric"] = "constant", ) -> None: super().__init__() if not isinstance(padding, (numbers.Number, tuple, list)): raise TypeError("Got inappropriate padding arg") - if not isinstance(fill, (numbers.Number, tuple, list)): - raise TypeError("Got inappropriate fill arg") + _check_fill_arg(fill) if padding_mode not in ["constant", "edge", "reflect", "symmetric"]: raise ValueError("Padding mode should be either constant, edge, reflect or symmetric") @@ -252,12 +256,11 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: class RandomZoomOut(_RandomApplyTransform): def __init__( - self, fill: Union[float, Sequence[float]] = 0.0, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5 + self, fill: Union[int, float, Sequence[int], Sequence[float]] = 0, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5 ) -> None: super().__init__(p=p) - if fill is None: - fill = 0.0 + _check_fill_arg(fill) self.fill = fill self.side_range = side_range @@ -292,3 +295,41 @@ def forward(self, *inputs: Any) -> Any: self._pad_op.padding = params["padding"] self._pad_op.fill = params["fill"] return self._pad_op(*inputs) + + +class RandomRotation(Transform): + def __init__( + self, + degrees: Union[numbers.Number, Sequence], + interpolation: InterpolationMode = InterpolationMode.NEAREST, + expand: bool = False, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + center: Optional[List[float]] = None, + ) -> None: + super().__init__() + self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,)) + self.interpolation = interpolation + self.expand = expand + + _check_fill_arg(fill) + + self.fill = fill + + if center is not None: + _check_sequence_input(center, "center", req_sizes=(2,)) + + self.center = center + + def _get_params(self, sample: Any) -> Dict[str, Any]: + angle = float(torch.empty(1).uniform_(float(self.degrees[0]), float(self.degrees[1])).item()) + return dict(angle=angle) + + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + return F.rotate( + inpt, + **params, + interpolation=self.interpolation, + expand=self.expand, + fill=self.fill, + center=self.center, + ) \ No newline at end of file diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 65f24c5055b..7d274f13420 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -231,7 +231,7 @@ def affine_image_pil( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> PIL.Image.Image: angle, translate, shear, center = _affine_parse_args(angle, translate, scale, shear, interpolation, center) @@ -369,7 +369,7 @@ def affine( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> DType: if isinstance(inpt, features._Feature): @@ -387,6 +387,15 @@ def affine( fill=fill, center=center, ) + + # This cast does Sequence -> List[float] to please mypy and torch.jit.script + if not isinstance(fill, (int, float)): + fill = [float(v) for v in list(fill)] + + if isinstance(fill, (int, float)): + # It is OK to cast int to float as later we use inpt.dtype + fill = [float(fill)] + return affine_image_tensor( inpt, angle, @@ -427,7 +436,7 @@ def rotate_image_pil( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> PIL.Image.Image: if center is not None and expand: @@ -483,13 +492,22 @@ def rotate( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Optional[List[float]] = None, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, center: Optional[List[float]] = None, ) -> DType: if isinstance(inpt, features._Feature): return inpt.rotate(angle, interpolation=interpolation, expand=expand, fill=fill, center=center) if isinstance(inpt, PIL.Image.Image): return rotate_image_pil(inpt, angle, interpolation=interpolation, expand=expand, fill=fill, center=center) + + # This cast does Sequence -> List[float] to please mypy and torch.jit.script + if not isinstance(fill, (int, float)): + fill = [float(v) for v in list(fill)] + + if isinstance(fill, (int, float)): + # It is OK to cast int to float as later we use inpt.dtype + fill = [float(fill)] + return rotate_image_tensor(inpt, angle, interpolation=interpolation, expand=expand, fill=fill, center=center) diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py index 54bf926762a..75b437b54e3 100644 --- a/torchvision/transforms/functional_pil.py +++ b/torchvision/transforms/functional_pil.py @@ -1,5 +1,5 @@ import numbers -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Dict, List, Optional, Sequence, Tuple, Union import numpy as np import torch @@ -304,7 +304,7 @@ def rotate( interpolation: int = _pil_constants.NEAREST, expand: bool = False, center: Optional[Tuple[int, int]] = None, - fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, ) -> Image.Image: if not _is_pil_image(img): From a3c6106703df91f9c858d963afb73d4fa74e7cfa Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 7 Jul 2022 17:35:02 +0200 Subject: [PATCH 04/14] code formatting and type hints for affine transformation --- test/test_transforms_tensor.py | 60 ++----------- torchvision/prototype/transforms/_augment.py | 1 + torchvision/prototype/transforms/_geometry.py | 90 ++++++++++++++++++- .../transforms/functional/_geometry.py | 2 +- torchvision/transforms/functional_tensor.py | 4 +- 5 files changed, 98 insertions(+), 59 deletions(-) diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index 73563bccd18..5e78b241c7b 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -291,11 +291,7 @@ def test_center_crop(device, tmpdir): scripted_fn(tensor) # Test torchscript of transforms.CenterCrop with size as [int, ] - f = T.CenterCrop( - size=[ - 5, - ] - ) + f = T.CenterCrop(size=[5]) scripted_fn = torch.jit.script(f) scripted_fn(tensor) @@ -317,17 +313,7 @@ def test_center_crop(device, tmpdir): (F.ten_crop, T.TenCrop, 10), ], ) -@pytest.mark.parametrize( - "size", - [ - (5,), - [ - 5, - ], - (4, 5), - [4, 5], - ], -) +@pytest.mark.parametrize("size", [(5,), [5], (4, 5), [4, 5]]) def test_x_crop(fn, method, out_length, size, device): meth_kwargs = fn_kwargs = {"size": size} scripted_fn = torch.jit.script(fn) @@ -509,19 +495,7 @@ def test_random_affine_degrees(device, interpolation, degrees): @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) -@pytest.mark.parametrize( - "fill", - [ - 85, - (10, -10, 10), - 0.7, - [0.0, 0.0, 0.0], - [ - 1, - ], - 1, - ], -) +@pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_random_affine_fill(device, interpolation, fill): _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, fill=fill) @@ -531,19 +505,7 @@ def test_random_affine_fill(device, interpolation, fill): @pytest.mark.parametrize("expand", [True, False]) @pytest.mark.parametrize("degrees", [45, 35.0, (-45, 45), [-90.0, 90.0]]) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) -@pytest.mark.parametrize( - "fill", - [ - 85, - (10, -10, 10), - 0.7, - [0.0, 0.0, 0.0], - [ - 1, - ], - 1, - ], -) +@pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_random_rotate(device, center, expand, degrees, interpolation, fill): tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) @@ -564,19 +526,7 @@ def test_random_rotate_save(tmpdir): @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize("distortion_scale", np.linspace(0.1, 1.0, num=20)) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) -@pytest.mark.parametrize( - "fill", - [ - 85, - (10, -10, 10), - 0.7, - [0.0, 0.0, 0.0], - [ - 1, - ], - 1, - ], -) +@pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_random_perspective(device, distortion_scale, interpolation, fill): tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) diff --git a/torchvision/prototype/transforms/_augment.py b/torchvision/prototype/transforms/_augment.py index 143721f7ee0..a09309a4d6f 100644 --- a/torchvision/prototype/transforms/_augment.py +++ b/torchvision/prototype/transforms/_augment.py @@ -100,6 +100,7 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: "RandomErasing transformation does not support bounding boxes, segmentation masks and plain labels" ) + class _BaseMixupCutmix(Transform): def __init__(self, *, alpha: float) -> None: super().__init__() diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index dfb20718134..da44d261b92 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -256,7 +256,10 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: class RandomZoomOut(_RandomApplyTransform): def __init__( - self, fill: Union[int, float, Sequence[int], Sequence[float]] = 0, side_range: Tuple[float, float] = (1.0, 4.0), p: float = 0.5 + self, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + side_range: Tuple[float, float] = (1.0, 4.0), + p: float = 0.5, ) -> None: super().__init__(p=p) @@ -332,4 +335,87 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: expand=self.expand, fill=self.fill, center=self.center, - ) \ No newline at end of file + ) + + +class RandomAffine(Transform): + def __init__( + self, + degrees: Union[numbers.Number, Sequence], + translate: Optional[Sequence[float]] = None, + scale: Optional[Sequence[float]] = None, + shear: Optional[Union[float, Sequence[float]]] = None, + interpolation: InterpolationMode = InterpolationMode.NEAREST, + fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + center: Optional[List[float]] = None, + ) -> None: + super().__init__() + self.degrees = _setup_angle(degrees, name="degrees", req_sizes=(2,)) + if translate is not None: + _check_sequence_input(translate, "translate", req_sizes=(2,)) + for t in translate: + if not (0.0 <= t <= 1.0): + raise ValueError("translation values should be between 0 and 1") + self.translate = translate + if scale is not None: + _check_sequence_input(scale, "scale", req_sizes=(2,)) + for s in scale: + if s <= 0: + raise ValueError("scale values should be positive") + self.scale = scale + + if shear is not None: + self.shear = _setup_angle(shear, name="shear", req_sizes=(2, 4)) + else: + self.shear = shear + + self.interpolation = interpolation + + _check_fill_arg(fill) + + self.fill = fill + + if center is not None: + _check_sequence_input(center, "center", req_sizes=(2,)) + + self.center = center + + def _get_params(self, sample: Any) -> Dict[str, Any]: + + # Get image size + # TODO: make it work with bboxes and segm masks + image = query_image(sample) + _, height, width = get_image_dimensions(image) + + angle = float(torch.empty(1).uniform_(float(self.degrees[0]), float(self.degrees[1])).item()) + if self.translate is not None: + max_dx = float(self.translate[0] * width) + max_dy = float(self.translate[1] * height) + tx = int(round(torch.empty(1).uniform_(-max_dx, max_dx).item())) + ty = int(round(torch.empty(1).uniform_(-max_dy, max_dy).item())) + translations = (tx, ty) + else: + translations = (0, 0) + + if self.scale is not None: + scale = float(torch.empty(1).uniform_(self.scale[0], self.scale[1]).item()) + else: + scale = 1.0 + + shear_x = shear_y = 0.0 + if self.shear is not None: + shear_x = float(torch.empty(1).uniform_(self.shear[0], self.shear[1]).item()) + if len(self.shear) == 4: + shear_y = float(torch.empty(1).uniform_(self.shear[2], self.shear[3]).item()) + + shear = (shear_x, shear_y) + return dict(angle=angle, translations=translations, scale=scale, shear=shear) + + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + return F.affine( + inpt, + **params, + interpolation=self.interpolation, + fill=self.fill, + center=self.center, + ) diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 7d274f13420..079cd8cb654 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -587,7 +587,7 @@ def pad( inpt: DType, padding: Union[int, Sequence[int]], fill: Union[int, float, Sequence[int], Sequence[float]] = 0, - padding_mode: str = "constant" + padding_mode: str = "constant", ) -> DType: if isinstance(inpt, features._Feature): return inpt.pad(padding, fill=fill, padding_mode=padding_mode) diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index 5953fe50dcd..ba0a9d462e0 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -370,7 +370,9 @@ def _parse_pad_padding(padding: Union[int, List[int]]) -> List[int]: return [pad_left, pad_right, pad_top, pad_bottom] -def pad(img: Tensor, padding: Union[int, List[int]], fill: Union[int, float] = 0, padding_mode: str = "constant") -> Tensor: +def pad( + img: Tensor, padding: Union[int, List[int]], fill: Union[int, float] = 0, padding_mode: str = "constant" +) -> Tensor: _assert_image_tensor(img) if not isinstance(padding, (int, tuple, list)): From fbda892430f7507698dc726ebfb46c6afbaca4d1 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Thu, 7 Jul 2022 17:40:48 +0200 Subject: [PATCH 05/14] Fixed flake8 --- torchvision/prototype/features/_label.py | 2 +- torchvision/prototype/features/_segmentation_mask.py | 3 +-- torchvision/prototype/transforms/_auto_augment.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/torchvision/prototype/features/_label.py b/torchvision/prototype/features/_label.py index 38e32f19057..e3433b7bb08 100644 --- a/torchvision/prototype/features/_label.py +++ b/torchvision/prototype/features/_label.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Optional, Sequence, cast, Union, Tuple +from typing import Any, Optional, Sequence, cast, Union import torch from torchvision.prototype.utils._internal import apply_recursively diff --git a/torchvision/prototype/features/_segmentation_mask.py b/torchvision/prototype/features/_segmentation_mask.py index 765f48be63e..8ca5159e000 100644 --- a/torchvision/prototype/features/_segmentation_mask.py +++ b/torchvision/prototype/features/_segmentation_mask.py @@ -1,8 +1,7 @@ from __future__ import annotations -from typing import Tuple, List, Optional, Union, Sequence +from typing import List, Optional, Union, Sequence -import torch from torchvision.transforms import InterpolationMode from ._feature import _Feature diff --git a/torchvision/prototype/transforms/_auto_augment.py b/torchvision/prototype/transforms/_auto_augment.py index 8508f3549f0..45a8e4817bf 100644 --- a/torchvision/prototype/transforms/_auto_augment.py +++ b/torchvision/prototype/transforms/_auto_augment.py @@ -9,7 +9,7 @@ from torchvision.transforms.autoaugment import AutoAugmentPolicy from torchvision.transforms.functional import pil_to_tensor, to_pil_image, InterpolationMode -from ._utils import get_image_dimensions, is_simple_tensor +from ._utils import get_image_dimensions K = TypeVar("K") V = TypeVar("V") From e58fafd8e4cfc1042832f427000165198b084851 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Fri, 8 Jul 2022 10:56:15 +0200 Subject: [PATCH 06/14] Updated tests to save and load transforms --- test/test_transforms_tensor.py | 255 ++++++++------------------------- 1 file changed, 62 insertions(+), 193 deletions(-) diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index 73563bccd18..e37d08fa113 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -93,6 +93,15 @@ def _test_op(func, method, device, channels=3, fn_kwargs=None, meth_kwargs=None, _test_class_op(method, device, channels, meth_kwargs, test_exact_match=test_exact_match, **match_kwargs) +def _test_fn_save_load(fn, tmpdir): + scripted_fn = torch.jit.script(fn) + p = os.path.join( + tmpdir, f"t_op_list_{fn.__name__ if hasattr(fn, '__name__') else fn.__class__.__name__}.pt" + ) + scripted_fn.save(p) + _ = torch.jit.load(p) + + @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize( "func,method,fn_kwargs,match_kwargs", @@ -204,9 +213,7 @@ def test_pad(m, mul, device): _test_functional_op(F.pad, fn_kwargs={"padding": mul * 2, "fill": fill, "padding_mode": m}, device=device) # Test functional.pad and transforms.Pad with padding as [int, ] fn_kwargs = meth_kwargs = { - "padding": [ - mul * 2, - ], + "padding": [mul * 2], "fill": fill, "padding_mode": m, } @@ -253,33 +260,30 @@ def test_crop(device): [ {"padding_mode": "constant", "fill": 0}, {"padding_mode": "constant", "fill": 10}, - {"padding_mode": "constant", "fill": 20}, {"padding_mode": "edge"}, {"padding_mode": "reflect"}, ], ) -@pytest.mark.parametrize( - "size", - [ - 5, - [ - 5, - ], - [6, 6], - ], -) -def test_crop_pad(size, padding_config, device): +@pytest.mark.parametrize("pad_if_needed", [True, False]) +@pytest.mark.parametrize("padding", [[5], [5, 4], [1, 2, 3, 4]]) +@pytest.mark.parametrize("size", [5, [5], [6, 6]]) +def test_random_crop(size, padding, pad_if_needed, padding_config, device): config = dict(padding_config) config["size"] = size + config["padding"] = padding + config["pad_if_needed"] = pad_if_needed _test_class_op(T.RandomCrop, device, meth_kwargs=config) +def test_random_crop_save_load(tmpdir): + fn = T.RandomCrop(32, [4], pad_if_needed=True) + _test_fn_save_load(fn, tmpdir) + + @pytest.mark.parametrize("device", cpu_and_gpu()) def test_center_crop(device, tmpdir): fn_kwargs = {"output_size": (4, 5)} - meth_kwargs = { - "size": (4, 5), - } + meth_kwargs = {"size": (4, 5)} _test_op(F.center_crop, T.CenterCrop, device=device, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs) fn_kwargs = {"output_size": (5,)} meth_kwargs = {"size": (5,)} @@ -291,11 +295,7 @@ def test_center_crop(device, tmpdir): scripted_fn(tensor) # Test torchscript of transforms.CenterCrop with size as [int, ] - f = T.CenterCrop( - size=[ - 5, - ] - ) + f = T.CenterCrop(size=[5]) scripted_fn = torch.jit.script(f) scripted_fn(tensor) @@ -304,7 +304,10 @@ def test_center_crop(device, tmpdir): scripted_fn = torch.jit.script(f) scripted_fn(tensor) - scripted_fn.save(os.path.join(tmpdir, "t_center_crop.pt")) + +def test_center_crop_save_load(tmpdir): + fn = T.CenterCrop(size=[5]) + _test_fn_save_load(fn, tmpdir) @pytest.mark.parametrize("device", cpu_and_gpu()) @@ -317,17 +320,7 @@ def test_center_crop(device, tmpdir): (F.ten_crop, T.TenCrop, 10), ], ) -@pytest.mark.parametrize( - "size", - [ - (5,), - [ - 5, - ], - (4, 5), - [4, 5], - ], -) +@pytest.mark.parametrize("size", [(5,), [5], (4, 5), [4, 5]]) def test_x_crop(fn, method, out_length, size, device): meth_kwargs = fn_kwargs = {"size": size} scripted_fn = torch.jit.script(fn) @@ -366,14 +359,9 @@ def test_x_crop(fn, method, out_length, size, device): @pytest.mark.parametrize("method", ["FiveCrop", "TenCrop"]) -def test_x_crop_save(method, tmpdir): - fn = getattr(T, method)( - size=[ - 5, - ] - ) - scripted_fn = torch.jit.script(fn) - scripted_fn.save(os.path.join(tmpdir, f"t_op_list_{method}.pt")) +def test_x_crop_save_load(method, tmpdir): + fn = getattr(T, method)(size=[5]) + _test_fn_save_load(fn, tmpdir) class TestResize: @@ -391,15 +379,7 @@ def test_resize_int(self, size): @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64]) - @pytest.mark.parametrize( - "size", - [ - [32], - [32, 32], - (32, 32), - [34, 35], - ], - ) + @pytest.mark.parametrize("size", [[32], [32, 32], (32, 32), [34, 35]]) @pytest.mark.parametrize("max_size", [None, 35, 1000]) @pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC, NEAREST]) def test_resize_scripted(self, dt, size, max_size, interpolation, device): @@ -417,25 +397,14 @@ def test_resize_scripted(self, dt, size, max_size, interpolation, device): _test_transform_vs_scripted(transform, s_transform, tensor) _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) - def test_resize_save(self, tmpdir): - transform = T.Resize(size=[32]) - s_transform = torch.jit.script(transform) - s_transform.save(os.path.join(tmpdir, "t_resize.pt")) + def test_resize_save_load(self, tmpdir): + fn = T.Resize(size=[32]) + _test_fn_save_load(fn, tmpdir) @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize("scale", [(0.7, 1.2), [0.7, 1.2]]) @pytest.mark.parametrize("ratio", [(0.75, 1.333), [0.75, 1.333]]) - @pytest.mark.parametrize( - "size", - [ - (32,), - [44], - [32], - [32, 32], - (32, 32), - [44, 55], - ], - ) + @pytest.mark.parametrize("size", [(32,), [44], [32], [32, 32], (32, 32), [44, 55]]) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR, BICUBIC]) @pytest.mark.parametrize("antialias", [None, True, False]) def test_resized_crop(self, scale, ratio, size, interpolation, antialias, device): @@ -452,14 +421,9 @@ def test_resized_crop(self, scale, ratio, size, interpolation, antialias, device _test_transform_vs_scripted(transform, s_transform, tensor) _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) - def test_resized_crop_save(self, tmpdir): - transform = T.RandomResizedCrop( - size=[ - 32, - ] - ) - s_transform = torch.jit.script(transform) - s_transform.save(os.path.join(tmpdir, "t_resized_crop.pt")) + def test_resized_crop_save_load(self, tmpdir): + fn = T.RandomResizedCrop(size=[32]) + _test_fn_save_load(fn, tmpdir) def _test_random_affine_helper(device, **kwargs): @@ -472,11 +436,9 @@ def _test_random_affine_helper(device, **kwargs): _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) -@pytest.mark.parametrize("device", cpu_and_gpu()) -def test_random_affine(device, tmpdir): - transform = T.RandomAffine(degrees=45.0) - s_transform = torch.jit.script(transform) - s_transform.save(os.path.join(tmpdir, "t_random_affine.pt")) +def test_random_affine_save_load(tmpdir): + fn = T.RandomAffine(degrees=45.0) + _test_fn_save_load(fn, tmpdir) @pytest.mark.parametrize("device", cpu_and_gpu()) @@ -509,19 +471,7 @@ def test_random_affine_degrees(device, interpolation, degrees): @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) -@pytest.mark.parametrize( - "fill", - [ - 85, - (10, -10, 10), - 0.7, - [0.0, 0.0, 0.0], - [ - 1, - ], - 1, - ], -) +@pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_random_affine_fill(device, interpolation, fill): _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, fill=fill) @@ -531,19 +481,7 @@ def test_random_affine_fill(device, interpolation, fill): @pytest.mark.parametrize("expand", [True, False]) @pytest.mark.parametrize("degrees", [45, 35.0, (-45, 45), [-90.0, 90.0]]) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) -@pytest.mark.parametrize( - "fill", - [ - 85, - (10, -10, 10), - 0.7, - [0.0, 0.0, 0.0], - [ - 1, - ], - 1, - ], -) +@pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_random_rotate(device, center, expand, degrees, interpolation, fill): tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) @@ -555,28 +493,15 @@ def test_random_rotate(device, center, expand, degrees, interpolation, fill): _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) -def test_random_rotate_save(tmpdir): - transform = T.RandomRotation(degrees=45.0) - s_transform = torch.jit.script(transform) - s_transform.save(os.path.join(tmpdir, "t_random_rotate.pt")) +def test_random_rotate_save_load(tmpdir): + fn = T.RandomRotation(degrees=45.0) + _test_fn_save_load(fn, tmpdir) @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize("distortion_scale", np.linspace(0.1, 1.0, num=20)) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) -@pytest.mark.parametrize( - "fill", - [ - 85, - (10, -10, 10), - 0.7, - [0.0, 0.0, 0.0], - [ - 1, - ], - 1, - ], -) +@pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_random_perspective(device, distortion_scale, interpolation, fill): tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) @@ -588,10 +513,9 @@ def test_random_perspective(device, distortion_scale, interpolation, fill): _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) -def test_random_perspective_save(tmpdir): - transform = T.RandomPerspective() - s_transform = torch.jit.script(transform) - s_transform.save(os.path.join(tmpdir, "t_perspective.pt")) +def test_random_perspective_save_load(tmpdir): + fn = T.RandomPerspective() + _test_fn_save_load(fn, tmpdir) @pytest.mark.parametrize("device", cpu_and_gpu()) @@ -630,28 +554,14 @@ def test_convert_image_dtype(device, in_dtype, out_dtype): _test_transform_vs_scripted_on_batch(fn, scripted_fn, in_batch_tensors) -def test_convert_image_dtype_save(tmpdir): +def test_convert_image_dtype_save_load(tmpdir): fn = T.ConvertImageDtype(dtype=torch.uint8) - scripted_fn = torch.jit.script(fn) - scripted_fn.save(os.path.join(tmpdir, "t_convert_dtype.pt")) + _test_fn_save_load(fn, tmpdir) @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize("policy", [policy for policy in T.AutoAugmentPolicy]) -@pytest.mark.parametrize( - "fill", - [ - None, - 85, - (10, -10, 10), - 0.7, - [0.0, 0.0, 0.0], - [ - 1, - ], - 1, - ], -) +@pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_autoaugment(device, policy, fill): tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) @@ -666,20 +576,7 @@ def test_autoaugment(device, policy, fill): @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize("num_ops", [1, 2, 3]) @pytest.mark.parametrize("magnitude", [7, 9, 11]) -@pytest.mark.parametrize( - "fill", - [ - None, - 85, - (10, -10, 10), - 0.7, - [0.0, 0.0, 0.0], - [ - 1, - ], - 1, - ], -) +@pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_randaugment(device, num_ops, magnitude, fill): tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) @@ -692,20 +589,7 @@ def test_randaugment(device, num_ops, magnitude, fill): @pytest.mark.parametrize("device", cpu_and_gpu()) -@pytest.mark.parametrize( - "fill", - [ - None, - 85, - (10, -10, 10), - 0.7, - [0.0, 0.0, 0.0], - [ - 1, - ], - 1, - ], -) +@pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_trivialaugmentwide(device, fill): tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) @@ -718,20 +602,7 @@ def test_trivialaugmentwide(device, fill): @pytest.mark.parametrize("device", cpu_and_gpu()) -@pytest.mark.parametrize( - "fill", - [ - None, - 85, - (10, -10, 10), - 0.7, - [0.0, 0.0, 0.0], - [ - 1, - ], - 1, - ], -) +@pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_augmix(device, fill): tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) batch_tensors = torch.randint(0, 256, size=(4, 3, 44, 56), dtype=torch.uint8, device=device) @@ -749,10 +620,9 @@ def _sample_dirichlet(self, params: torch.Tensor) -> torch.Tensor: @pytest.mark.parametrize("augmentation", [T.AutoAugment, T.RandAugment, T.TrivialAugmentWide, T.AugMix]) -def test_autoaugment_save(augmentation, tmpdir): - transform = augmentation() - s_transform = torch.jit.script(transform) - s_transform.save(os.path.join(tmpdir, "t_autoaugment.pt")) +def test_autoaugment_save_load(augmentation, tmpdir): + fn = augmentation() + _test_fn_save_load(fn, tmpdir) @pytest.mark.parametrize("interpolation", [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR]) @@ -812,10 +682,9 @@ def test_random_erasing(device, config): _test_transform_vs_scripted_on_batch(fn, scripted_fn, batch_tensors) -def test_random_erasing_save(tmpdir): +def test_random_erasing_save_load(tmpdir): fn = T.RandomErasing(value=0.2) - scripted_fn = torch.jit.script(fn) - scripted_fn.save(os.path.join(tmpdir, "t_random_erasing.pt")) + _test_fn_save_load(fn, tmpdir) def test_random_erasing_with_invalid_data(): From 3e426c18f4c6631135991087577269f0e4d044b8 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Fri, 8 Jul 2022 11:06:33 +0200 Subject: [PATCH 07/14] Fixed code formatting issue --- test/test_transforms_tensor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index e37d08fa113..7dc6dbd95d9 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -95,9 +95,7 @@ def _test_op(func, method, device, channels=3, fn_kwargs=None, meth_kwargs=None, def _test_fn_save_load(fn, tmpdir): scripted_fn = torch.jit.script(fn) - p = os.path.join( - tmpdir, f"t_op_list_{fn.__name__ if hasattr(fn, '__name__') else fn.__class__.__name__}.pt" - ) + p = os.path.join(tmpdir, f"t_op_list_{fn.__name__ if hasattr(fn, '__name__') else fn.__class__.__name__}.pt") scripted_fn.save(p) _ = torch.jit.load(p) From 02ec95a28c0b9f958960c871146b75b735488287 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Fri, 8 Jul 2022 11:51:07 +0200 Subject: [PATCH 08/14] Fixed jit loading issue --- torchvision/transforms/functional_tensor.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py index ba0a9d462e0..2b0872acf8a 100644 --- a/torchvision/transforms/functional_tensor.py +++ b/torchvision/transforms/functional_tensor.py @@ -385,8 +385,13 @@ def pad( if isinstance(padding, tuple): padding = list(padding) - if isinstance(padding, list) and len(padding) not in [1, 2, 4]: - raise ValueError(f"Padding must be an int or a 1, 2, or 4 element tuple, not a {len(padding)} element tuple") + if isinstance(padding, list): + # TODO: Jit is failing on loading this op when scripted and saved + # https://github.com/pytorch/pytorch/issues/81100 + if len(padding) not in [1, 2, 4]: + raise ValueError( + f"Padding must be an int or a 1, 2, or 4 element tuple, not a {len(padding)} element tuple" + ) if padding_mode not in ["constant", "edge", "reflect", "symmetric"]: raise ValueError("Padding mode should be either constant, edge, reflect or symmetric") From 4a0cec5143db00684c15ec022efa487c908017af Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Fri, 8 Jul 2022 16:02:59 +0200 Subject: [PATCH 09/14] Restored fill default value to None Updated code according to the review --- test/test_prototype_transforms.py | 1 + .../prototype/features/_bounding_box.py | 8 +-- torchvision/prototype/features/_feature.py | 8 +-- torchvision/prototype/features/_image.py | 35 ++++++------- .../prototype/features/_segmentation_mask.py | 8 +-- torchvision/prototype/transforms/_augment.py | 17 +++---- torchvision/prototype/transforms/_color.py | 20 +------- torchvision/prototype/transforms/_geometry.py | 9 +--- .../transforms/functional/_geometry.py | 49 +++++++++++-------- torchvision/transforms/functional_pil.py | 6 +-- 10 files changed, 69 insertions(+), 92 deletions(-) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index dc3de480d1f..2f6b808d7e3 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -72,6 +72,7 @@ class TestSmoke: transforms.ConvertImageDtype(), transforms.RandomHorizontalFlip(), transforms.Pad(5), + transforms.RandomZoomOut(), ) def test_common(self, transform, input): transform(input) diff --git a/torchvision/prototype/features/_bounding_box.py b/torchvision/prototype/features/_bounding_box.py index 319e6d80927..eb9d1f6ac3a 100644 --- a/torchvision/prototype/features/_bounding_box.py +++ b/torchvision/prototype/features/_bounding_box.py @@ -130,7 +130,7 @@ def resized_crop( def pad( self, padding: Union[int, Sequence[int]], - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, padding_mode: str = "constant", ) -> BoundingBox: from torchvision.prototype.transforms import functional as _F @@ -160,7 +160,7 @@ def rotate( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> BoundingBox: from torchvision.prototype.transforms import functional as _F @@ -180,7 +180,7 @@ def affine( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> BoundingBox: from torchvision.prototype.transforms import functional as _F @@ -201,7 +201,7 @@ def perspective( self, perspective_coeffs: List[float], interpolation: InterpolationMode = InterpolationMode.BILINEAR, - fill: Optional[List[float]] = None, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, ) -> BoundingBox: from torchvision.prototype.transforms import functional as _F diff --git a/torchvision/prototype/features/_feature.py b/torchvision/prototype/features/_feature.py index e6c4e55fd6c..775f09f2f4b 100644 --- a/torchvision/prototype/features/_feature.py +++ b/torchvision/prototype/features/_feature.py @@ -122,7 +122,7 @@ def resized_crop( def pad( self, padding: Union[int, Sequence[int]], - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, padding_mode: str = "constant", ) -> Any: return self @@ -132,7 +132,7 @@ def rotate( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> Any: return self @@ -144,7 +144,7 @@ def affine( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> Any: return self @@ -153,7 +153,7 @@ def perspective( self, perspective_coeffs: List[float], interpolation: InterpolationMode = InterpolationMode.BILINEAR, - fill: Optional[List[float]] = None, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, ) -> Any: return self diff --git a/torchvision/prototype/features/_image.py b/torchvision/prototype/features/_image.py index 62b17140725..447e67b33e9 100644 --- a/torchvision/prototype/features/_image.py +++ b/torchvision/prototype/features/_image.py @@ -166,7 +166,7 @@ def resized_crop( def pad( self, padding: Union[int, Sequence[int]], - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, padding_mode: str = "constant", ) -> Image: from torchvision.prototype.transforms import functional as _F @@ -175,6 +175,9 @@ def pad( if not isinstance(padding, int): padding = list(padding) + if fill is None: + fill = 0 + # PyTorch's pad supports only scalars on fill. So we need to overwrite the colour if isinstance(fill, (int, float)): output = _F.pad_image_tensor(self, padding, fill=fill, padding_mode=padding_mode) @@ -190,18 +193,12 @@ def rotate( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> Image: - from torchvision.prototype.transforms import functional as _F + from torchvision.prototype.transforms.functional import _geometry as _F - # This cast does Sequence -> List[float] to please mypy and torch.jit.script - if not isinstance(fill, (int, float)): - fill = [float(v) for v in list(fill)] - - if isinstance(fill, (int, float)): - # It is OK to cast int to float as later we use inpt.dtype - fill = [float(fill)] + fill = _F._convert_fill_arg(fill) output = _F.rotate_image_tensor( self, angle, interpolation=interpolation, expand=expand, fill=fill, center=center @@ -215,18 +212,12 @@ def affine( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> Image: - from torchvision.prototype.transforms import functional as _F + from torchvision.prototype.transforms.functional import _geometry as _F - # This cast does Sequence -> List[float] to please mypy and torch.jit.script - if not isinstance(fill, (int, float)): - fill = [float(v) for v in list(fill)] - - if isinstance(fill, (int, float)): - # It is OK to cast int to float as later we use inpt.dtype - fill = [float(fill)] + fill = _F._convert_fill_arg(fill) output = _F.affine_image_tensor( self, @@ -244,9 +235,11 @@ def perspective( self, perspective_coeffs: List[float], interpolation: InterpolationMode = InterpolationMode.BILINEAR, - fill: Optional[List[float]] = None, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, ) -> Image: - from torchvision.prototype.transforms import functional as _F + from torchvision.prototype.transforms.functional import _geometry as _F + + fill = _F._convert_fill_arg(fill) output = _F.perspective_image_tensor(self, perspective_coeffs, interpolation=interpolation, fill=fill) return Image.new_like(self, output) diff --git a/torchvision/prototype/features/_segmentation_mask.py b/torchvision/prototype/features/_segmentation_mask.py index 8ca5159e000..f894f33d1b2 100644 --- a/torchvision/prototype/features/_segmentation_mask.py +++ b/torchvision/prototype/features/_segmentation_mask.py @@ -62,7 +62,7 @@ def resized_crop( def pad( self, padding: Union[int, Sequence[int]], - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, padding_mode: str = "constant", ) -> SegmentationMask: from torchvision.prototype.transforms import functional as _F @@ -79,7 +79,7 @@ def rotate( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> SegmentationMask: from torchvision.prototype.transforms import functional as _F @@ -94,7 +94,7 @@ def affine( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> SegmentationMask: from torchvision.prototype.transforms import functional as _F @@ -113,7 +113,7 @@ def perspective( self, perspective_coeffs: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Optional[List[float]] = None, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, ) -> SegmentationMask: from torchvision.prototype.transforms import functional as _F diff --git a/torchvision/prototype/transforms/_augment.py b/torchvision/prototype/transforms/_augment.py index a09309a4d6f..e3058f35551 100644 --- a/torchvision/prototype/transforms/_augment.py +++ b/torchvision/prototype/transforms/_augment.py @@ -113,13 +113,12 @@ def forward(self, *inpts: Any) -> Any: raise TypeError(f"{type(self).__name__}() is only defined for Image's *and* OneHotLabel's.") return super().forward(sample) - -def _mixup_onehotlabel(inpt: features.OneHotLabel, lam: float) -> features.OneHotLabel: - if inpt.ndim < 2: - raise ValueError("Need a batch of one hot labels") - output = inpt.clone() - output = output.roll(1, -2).mul_(1 - lam).add_(output.mul_(lam)) - return features.OneHotLabel.new_like(inpt, output) + def _mixup_onehotlabel(self, inpt: features.OneHotLabel, lam: float) -> features.OneHotLabel: + if inpt.ndim < 2: + raise ValueError("Need a batch of one hot labels") + output = inpt.clone() + output = output.roll(1, -2).mul_(1 - lam).add_(output.mul_(lam)) + return features.OneHotLabel.new_like(inpt, output) class RandomMixup(_BaseMixupCutmix): @@ -135,7 +134,7 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: output = output.roll(1, -4).mul_(1 - lam).add_(output.mul_(lam)) return features.Image.new_like(inpt, output) if isinstance(inpt, features.OneHotLabel): - return _mixup_onehotlabel(inpt, lam) + return self._mixup_onehotlabel(inpt, lam) raise TypeError( "RandomMixup transformation does not support bounding boxes, segmentation masks and plain labels" @@ -178,7 +177,7 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: return features.Image.new_like(inpt, output) if isinstance(inpt, features.OneHotLabel): lam_adjusted = params["lam_adjusted"] - return _mixup_onehotlabel(inpt, lam_adjusted) + return self._mixup_onehotlabel(inpt, lam_adjusted) raise TypeError( "RandomCutmix transformation does not support bounding boxes, segmentation masks and plain labels" diff --git a/torchvision/prototype/transforms/_color.py b/torchvision/prototype/transforms/_color.py index 60fe46ed9ea..85e22aaeb1a 100644 --- a/torchvision/prototype/transforms/_color.py +++ b/torchvision/prototype/transforms/_color.py @@ -1,5 +1,5 @@ import collections.abc -from typing import Any, Dict, Union, Tuple, Optional, Sequence, Callable, TypeVar +from typing import Any, Dict, Union, Tuple, Optional, Sequence, TypeVar import PIL.Image import torch @@ -52,24 +52,6 @@ def _check_input( return None if value[0] == value[1] == center else (float(value[0]), float(value[1])) - def _image_transform( - self, - inpt: T, - *, - kernel_tensor: Callable[..., torch.Tensor], - kernel_pil: Callable[..., PIL.Image.Image], - **kwargs: Any, - ) -> T: - if isinstance(inpt, features.Image): - output = kernel_tensor(inpt, **kwargs) - return features.Image.new_like(inpt, output) - elif is_simple_tensor(inpt): - return kernel_tensor(inpt, **kwargs) - elif isinstance(inpt, PIL.Image.Image): - return kernel_pil(inpt, **kwargs) # type: ignore[no-any-return] - else: - raise RuntimeError - @staticmethod def _generate_value(left: float, right: float) -> float: return float(torch.distributions.Uniform(left, right).sample()) diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py index da44d261b92..d4162b2b631 100644 --- a/torchvision/prototype/transforms/_geometry.py +++ b/torchvision/prototype/transforms/_geometry.py @@ -270,8 +270,6 @@ def __init__( if side_range[0] < 1.0 or side_range[0] > side_range[1]: raise ValueError(f"Invalid canvas side range provided {side_range}.") - self._pad_op = Pad(0, padding_mode="constant") - def _get_params(self, sample: Any) -> Dict[str, Any]: image = query_image(sample) orig_c, orig_h, orig_w = get_image_dimensions(image) @@ -293,11 +291,8 @@ def _get_params(self, sample: Any) -> Dict[str, Any]: return dict(padding=padding, fill=fill) - def forward(self, *inputs: Any) -> Any: - params = self._get_params(inputs) - self._pad_op.padding = params["padding"] - self._pad_op.fill = params["fill"] - return self._pad_op(*inputs) + def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: + return F.pad(inpt, **params) class RandomRotation(Transform): diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 079cd8cb654..7b651210030 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -231,7 +231,7 @@ def affine_image_pil( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> PIL.Image.Image: angle, translate, shear, center = _affine_parse_args(angle, translate, scale, shear, interpolation, center) @@ -362,6 +362,19 @@ def affine_segmentation_mask( ) +def _convert_fill_arg(fill: Optional[Union[int, float, Sequence[int], Sequence[float]]]) -> Optional[List[float]]: + if fill is None: + fill = 0 + + # This cast does Sequence -> List[float] to please mypy and torch.jit.script + if not isinstance(fill, (int, float)): + fill = [float(v) for v in list(fill)] + else: + # It is OK to cast int to float as later we use inpt.dtype + fill = [float(fill)] + return fill + + def affine( inpt: DType, angle: float, @@ -369,7 +382,7 @@ def affine( scale: float, shear: List[float], interpolation: InterpolationMode = InterpolationMode.NEAREST, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> DType: if isinstance(inpt, features._Feature): @@ -388,13 +401,7 @@ def affine( center=center, ) - # This cast does Sequence -> List[float] to please mypy and torch.jit.script - if not isinstance(fill, (int, float)): - fill = [float(v) for v in list(fill)] - - if isinstance(fill, (int, float)): - # It is OK to cast int to float as later we use inpt.dtype - fill = [float(fill)] + fill = _convert_fill_arg(fill) return affine_image_tensor( inpt, @@ -436,7 +443,7 @@ def rotate_image_pil( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> PIL.Image.Image: if center is not None and expand: @@ -492,7 +499,7 @@ def rotate( angle: float, interpolation: InterpolationMode = InterpolationMode.NEAREST, expand: bool = False, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, center: Optional[List[float]] = None, ) -> DType: if isinstance(inpt, features._Feature): @@ -500,13 +507,7 @@ def rotate( if isinstance(inpt, PIL.Image.Image): return rotate_image_pil(inpt, angle, interpolation=interpolation, expand=expand, fill=fill, center=center) - # This cast does Sequence -> List[float] to please mypy and torch.jit.script - if not isinstance(fill, (int, float)): - fill = [float(v) for v in list(fill)] - - if isinstance(fill, (int, float)): - # It is OK to cast int to float as later we use inpt.dtype - fill = [float(fill)] + fill = _convert_fill_arg(fill) return rotate_image_tensor(inpt, angle, interpolation=interpolation, expand=expand, fill=fill, center=center) @@ -586,7 +587,7 @@ def pad_bounding_box( def pad( inpt: DType, padding: Union[int, Sequence[int]], - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, padding_mode: str = "constant", ) -> DType: if isinstance(inpt, features._Feature): @@ -598,6 +599,9 @@ def pad( if not isinstance(padding, int): padding = list(padding) + if fill is None: + fill = 0 + # TODO: PyTorch's pad supports only scalars on fill. So we need to overwrite the colour if isinstance(fill, (int, float)): return pad_image_tensor(inpt, padding, fill=fill, padding_mode=padding_mode) @@ -653,7 +657,7 @@ def perspective_image_pil( img: PIL.Image.Image, perspective_coeffs: List[float], interpolation: InterpolationMode = InterpolationMode.BICUBIC, - fill: Optional[List[float]] = None, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, ) -> PIL.Image.Image: return _FP.perspective(img, perspective_coeffs, interpolation=pil_modes_mapping[interpolation], fill=fill) @@ -751,12 +755,15 @@ def perspective( inpt: DType, perspective_coeffs: List[float], interpolation: InterpolationMode = InterpolationMode.BILINEAR, - fill: Optional[List[float]] = None, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, ) -> DType: if isinstance(inpt, features._Feature): return inpt.perspective(perspective_coeffs, interpolation=interpolation, fill=fill) if isinstance(inpt, PIL.Image.Image): return perspective_image_pil(inpt, perspective_coeffs, interpolation=interpolation, fill=fill) + + fill = _convert_fill_arg(fill) + return perspective_image_tensor(inpt, perspective_coeffs, interpolation=interpolation, fill=fill) diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py index 75b437b54e3..768176e6783 100644 --- a/torchvision/transforms/functional_pil.py +++ b/torchvision/transforms/functional_pil.py @@ -286,7 +286,7 @@ def affine( img: Image.Image, matrix: List[float], interpolation: int = _pil_constants.NEAREST, - fill: Optional[Union[float, List[float], Tuple[float, ...]]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, ) -> Image.Image: if not _is_pil_image(img): @@ -304,7 +304,7 @@ def rotate( interpolation: int = _pil_constants.NEAREST, expand: bool = False, center: Optional[Tuple[int, int]] = None, - fill: Union[int, float, Sequence[int], Sequence[float]] = 0, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, ) -> Image.Image: if not _is_pil_image(img): @@ -319,7 +319,7 @@ def perspective( img: Image.Image, perspective_coeffs: List[float], interpolation: int = _pil_constants.BICUBIC, - fill: Optional[Union[float, List[float], Tuple[float, ...]]] = None, + fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None, ) -> Image.Image: if not _is_pil_image(img): From a7d17ecf72519e338e20192d6ab4ffe265e96f74 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Fri, 8 Jul 2022 16:38:56 +0200 Subject: [PATCH 10/14] Added tests for rotation, affine and zoom transforms --- test/test_prototype_transforms.py | 2 ++ torchvision/prototype/transforms/__init__.py | 2 ++ .../transforms/functional/_geometry.py | 25 +++++++++++++------ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index 2f6b808d7e3..2c8f65e3086 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -73,6 +73,8 @@ class TestSmoke: transforms.RandomHorizontalFlip(), transforms.Pad(5), transforms.RandomZoomOut(), + transforms.RandomRotation(degrees=(-45, 45)), + transforms.RandomAffine(degrees=(-45, 45)), ) def test_common(self, transform, input): transform(input) diff --git a/torchvision/prototype/transforms/__init__.py b/torchvision/prototype/transforms/__init__.py index 5edd18890a8..2075ea7c52b 100644 --- a/torchvision/prototype/transforms/__init__.py +++ b/torchvision/prototype/transforms/__init__.py @@ -17,6 +17,8 @@ RandomVerticalFlip, Pad, RandomZoomOut, + RandomRotation, + RandomAffine, ) from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace from ._misc import Identity, Normalize, ToDtype, Lambda diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 7b651210030..1cd88b9120d 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -210,18 +210,22 @@ def affine_image_tensor( fill: Optional[List[float]] = None, center: Optional[List[float]] = None, ) -> torch.Tensor: + num_channels, height, width = img.shape[-3:] + extra_dims = img.shape[:-3] + img = img.view(-1, num_channels, height, width) + angle, translate, shear, center = _affine_parse_args(angle, translate, scale, shear, interpolation, center) center_f = [0.0, 0.0] if center is not None: - _, height, width = get_dimensions_image_tensor(img) # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center. center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])] translate_f = [1.0 * t for t in translate] matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear) - return _FT.affine(img, matrix, interpolation=interpolation.value, fill=fill) + output = _FT.affine(img, matrix, interpolation=interpolation.value, fill=fill) + return output.view(extra_dims + (num_channels, height, width)) def affine_image_pil( @@ -344,7 +348,7 @@ def affine_bounding_box( def affine_segmentation_mask( - img: torch.Tensor, + mask: torch.Tensor, angle: float, translate: List[float], scale: float, @@ -352,7 +356,7 @@ def affine_segmentation_mask( center: Optional[List[float]] = None, ) -> torch.Tensor: return affine_image_tensor( - img, + mask, angle=angle, translate=translate, scale=scale, @@ -423,6 +427,10 @@ def rotate_image_tensor( fill: Optional[List[float]] = None, center: Optional[List[float]] = None, ) -> torch.Tensor: + num_channels, height, width = img.shape[-3:] + extra_dims = img.shape[:-3] + img = img.view(-1, num_channels, height, width) + center_f = [0.0, 0.0] if center is not None: if expand: @@ -435,7 +443,8 @@ def rotate_image_tensor( # due to current incoherence of rotation angle direction between affine and rotate implementations # we need to set -angle. matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0]) - return _FT.rotate(img, matrix, interpolation=interpolation.value, expand=expand, fill=fill) + output = _FT.rotate(img, matrix, interpolation=interpolation.value, expand=expand, fill=fill) + return output.view(extra_dims + (num_channels, height, width)) def rotate_image_pil( @@ -518,15 +527,15 @@ def rotate( def pad_image_tensor( img: torch.Tensor, padding: Union[int, List[int]], fill: Union[int, float] = 0, padding_mode: str = "constant" ) -> torch.Tensor: - num_masks, height, width = img.shape[-3:] + num_channels, height, width = img.shape[-3:] extra_dims = img.shape[:-3] padded_image = _FT.pad( - img=img.view(-1, num_masks, height, width), padding=padding, fill=fill, padding_mode=padding_mode + img=img.view(-1, num_channels, height, width), padding=padding, fill=fill, padding_mode=padding_mode ) new_height, new_width = padded_image.shape[-2:] - return padded_image.view(extra_dims + (num_masks, new_height, new_width)) + return padded_image.view(extra_dims + (num_channels, new_height, new_width)) # TODO: This should be removed once pytorch pad supports non-scalar padding values From 014b8c711cd6910bdd8ca688eb8ee2e534145505 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Fri, 8 Jul 2022 16:43:40 +0200 Subject: [PATCH 11/14] Put back commented code --- torchvision/prototype/transforms/_auto_augment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchvision/prototype/transforms/_auto_augment.py b/torchvision/prototype/transforms/_auto_augment.py index 45a8e4817bf..03aa96e08fb 100644 --- a/torchvision/prototype/transforms/_auto_augment.py +++ b/torchvision/prototype/transforms/_auto_augment.py @@ -72,8 +72,8 @@ def _parse_fill( ) -> Union[int, float, Sequence[int], Sequence[float]]: fill = self.fill - # if isinstance(image, PIL.Image.Image) or fill is None: - # return fill + if isinstance(image, PIL.Image.Image) or fill is None: + return fill if isinstance(fill, (int, float)): fill = [float(fill)] * num_channels From 42b4bf309cf4115357f5b1876b305aa5b2b90507 Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Fri, 8 Jul 2022 17:33:34 +0200 Subject: [PATCH 12/14] Random erase bypass boxes and masks Go back with if-return/elif-return/else-return --- torchvision/prototype/transforms/_augment.py | 23 ++-- .../prototype/transforms/functional/_color.py | 55 +++++---- .../transforms/functional/_geometry.py | 112 +++++++++--------- 3 files changed, 100 insertions(+), 90 deletions(-) diff --git a/torchvision/prototype/transforms/_augment.py b/torchvision/prototype/transforms/_augment.py index e3058f35551..9a2c2c0f416 100644 --- a/torchvision/prototype/transforms/_augment.py +++ b/torchvision/prototype/transforms/_augment.py @@ -94,11 +94,8 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: elif isinstance(inpt, PIL.Image.Image): # TODO: We should implement a fallback to tensor, like gaussian_blur etc raise RuntimeError("Not implemented") - elif isinstance(inpt, torch.Tensor): - return F.erase_image_tensor(inpt, **params) - raise TypeError( - "RandomErasing transformation does not support bounding boxes, segmentation masks and plain labels" - ) + else: + return inpt class _BaseMixupCutmix(Transform): @@ -133,12 +130,10 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: output = inpt.clone() output = output.roll(1, -4).mul_(1 - lam).add_(output.mul_(lam)) return features.Image.new_like(inpt, output) - if isinstance(inpt, features.OneHotLabel): + elif isinstance(inpt, features.OneHotLabel): return self._mixup_onehotlabel(inpt, lam) - - raise TypeError( - "RandomMixup transformation does not support bounding boxes, segmentation masks and plain labels" - ) + else: + return inpt class RandomCutmix(_BaseMixupCutmix): @@ -175,10 +170,8 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any: output = inpt.clone() output[..., y1:y2, x1:x2] = image_rolled[..., y1:y2, x1:x2] return features.Image.new_like(inpt, output) - if isinstance(inpt, features.OneHotLabel): + elif isinstance(inpt, features.OneHotLabel): lam_adjusted = params["lam_adjusted"] return self._mixup_onehotlabel(inpt, lam_adjusted) - - raise TypeError( - "RandomCutmix transformation does not support bounding boxes, segmentation masks and plain labels" - ) + else: + return inpt diff --git a/torchvision/prototype/transforms/functional/_color.py b/torchvision/prototype/transforms/functional/_color.py index 70b7a8e1dfe..d5c5d305722 100644 --- a/torchvision/prototype/transforms/functional/_color.py +++ b/torchvision/prototype/transforms/functional/_color.py @@ -16,9 +16,10 @@ def adjust_brightness(inpt: DType, brightness_factor: float) -> DType: if isinstance(inpt, features._Feature): return inpt.adjust_brightness(brightness_factor=brightness_factor) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return adjust_brightness_image_pil(inpt, brightness_factor=brightness_factor) - return adjust_brightness_image_tensor(inpt, brightness_factor=brightness_factor) + else: + return adjust_brightness_image_tensor(inpt, brightness_factor=brightness_factor) adjust_saturation_image_tensor = _FT.adjust_saturation @@ -28,9 +29,10 @@ def adjust_brightness(inpt: DType, brightness_factor: float) -> DType: def adjust_saturation(inpt: DType, saturation_factor: float) -> DType: if isinstance(inpt, features._Feature): return inpt.adjust_saturation(saturation_factor=saturation_factor) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return adjust_saturation_image_pil(inpt, saturation_factor=saturation_factor) - return adjust_saturation_image_tensor(inpt, saturation_factor=saturation_factor) + else: + return adjust_saturation_image_tensor(inpt, saturation_factor=saturation_factor) adjust_contrast_image_tensor = _FT.adjust_contrast @@ -40,9 +42,10 @@ def adjust_saturation(inpt: DType, saturation_factor: float) -> DType: def adjust_contrast(inpt: DType, contrast_factor: float) -> DType: if isinstance(inpt, features._Feature): return inpt.adjust_contrast(contrast_factor=contrast_factor) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return adjust_contrast_image_pil(inpt, contrast_factor=contrast_factor) - return adjust_contrast_image_tensor(inpt, contrast_factor=contrast_factor) + else: + return adjust_contrast_image_tensor(inpt, contrast_factor=contrast_factor) adjust_sharpness_image_tensor = _FT.adjust_sharpness @@ -52,9 +55,10 @@ def adjust_contrast(inpt: DType, contrast_factor: float) -> DType: def adjust_sharpness(inpt: DType, sharpness_factor: float) -> DType: if isinstance(inpt, features._Feature): return inpt.adjust_sharpness(sharpness_factor=sharpness_factor) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return adjust_sharpness_image_pil(inpt, sharpness_factor=sharpness_factor) - return adjust_sharpness_image_tensor(inpt, sharpness_factor=sharpness_factor) + else: + return adjust_sharpness_image_tensor(inpt, sharpness_factor=sharpness_factor) adjust_hue_image_tensor = _FT.adjust_hue @@ -64,9 +68,10 @@ def adjust_sharpness(inpt: DType, sharpness_factor: float) -> DType: def adjust_hue(inpt: DType, hue_factor: float) -> DType: if isinstance(inpt, features._Feature): return inpt.adjust_hue(hue_factor=hue_factor) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return adjust_hue_image_pil(inpt, hue_factor=hue_factor) - return adjust_hue_image_tensor(inpt, hue_factor=hue_factor) + else: + return adjust_hue_image_tensor(inpt, hue_factor=hue_factor) adjust_gamma_image_tensor = _FT.adjust_gamma @@ -76,9 +81,10 @@ def adjust_hue(inpt: DType, hue_factor: float) -> DType: def adjust_gamma(inpt: DType, gamma: float, gain: float = 1) -> DType: if isinstance(inpt, features._Feature): return inpt.adjust_gamma(gamma=gamma, gain=gain) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return adjust_gamma_image_pil(inpt, gamma=gamma, gain=gain) - return adjust_gamma_image_tensor(inpt, gamma=gamma, gain=gain) + else: + return adjust_gamma_image_tensor(inpt, gamma=gamma, gain=gain) posterize_image_tensor = _FT.posterize @@ -88,9 +94,10 @@ def adjust_gamma(inpt: DType, gamma: float, gain: float = 1) -> DType: def posterize(inpt: DType, bits: int) -> DType: if isinstance(inpt, features._Feature): return inpt.posterize(bits=bits) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return posterize_image_pil(inpt, bits=bits) - return posterize_image_tensor(inpt, bits=bits) + else: + return posterize_image_tensor(inpt, bits=bits) solarize_image_tensor = _FT.solarize @@ -100,9 +107,10 @@ def posterize(inpt: DType, bits: int) -> DType: def solarize(inpt: DType, threshold: float) -> DType: if isinstance(inpt, features._Feature): return inpt.solarize(threshold=threshold) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return solarize_image_pil(inpt, threshold=threshold) - return solarize_image_tensor(inpt, threshold=threshold) + else: + return solarize_image_tensor(inpt, threshold=threshold) autocontrast_image_tensor = _FT.autocontrast @@ -112,9 +120,10 @@ def solarize(inpt: DType, threshold: float) -> DType: def autocontrast(inpt: DType) -> DType: if isinstance(inpt, features._Feature): return inpt.autocontrast() - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return autocontrast_image_pil(inpt) - return autocontrast_image_tensor(inpt) + else: + return autocontrast_image_tensor(inpt) equalize_image_tensor = _FT.equalize @@ -124,9 +133,10 @@ def autocontrast(inpt: DType) -> DType: def equalize(inpt: DType) -> DType: if isinstance(inpt, features._Feature): return inpt.equalize() - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return equalize_image_pil(inpt) - return equalize_image_tensor(inpt) + else: + return equalize_image_tensor(inpt) invert_image_tensor = _FT.invert @@ -136,6 +146,7 @@ def equalize(inpt: DType) -> DType: def invert(inpt: DType) -> DType: if isinstance(inpt, features._Feature): return inpt.invert() - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return invert_image_pil(inpt) - return invert_image_tensor(inpt) + else: + return invert_image_tensor(inpt) diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py index 1cd88b9120d..8d3ed675047 100644 --- a/torchvision/prototype/transforms/functional/_geometry.py +++ b/torchvision/prototype/transforms/functional/_geometry.py @@ -47,9 +47,10 @@ def horizontal_flip_bounding_box( def horizontal_flip(inpt: DType) -> DType: if isinstance(inpt, features._Feature): return inpt.horizontal_flip() - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return horizontal_flip_image_pil(inpt) - return horizontal_flip_image_tensor(inpt) + else: + return horizontal_flip_image_tensor(inpt) vertical_flip_image_tensor = _FT.vflip @@ -79,9 +80,10 @@ def vertical_flip_bounding_box( def vertical_flip(inpt: DType) -> DType: if isinstance(inpt, features._Feature): return inpt.vertical_flip() - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return vertical_flip_image_pil(inpt) - return vertical_flip_image_tensor(inpt) + else: + return vertical_flip_image_tensor(inpt) def resize_image_tensor( @@ -141,13 +143,13 @@ def resize( if isinstance(inpt, features._Feature): antialias = False if antialias is None else antialias return inpt.resize(size, interpolation=interpolation, max_size=max_size, antialias=antialias) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): if antialias is not None and not antialias: warnings.warn("Anti-alias option is always applied for PIL Image input. Argument antialias is ignored.") return resize_image_pil(inpt, size, interpolation=interpolation, max_size=max_size) - - antialias = False if antialias is None else antialias - return resize_image_tensor(inpt, size, interpolation=interpolation, max_size=max_size, antialias=antialias) + else: + antialias = False if antialias is None else antialias + return resize_image_tensor(inpt, size, interpolation=interpolation, max_size=max_size, antialias=antialias) def _affine_parse_args( @@ -393,7 +395,7 @@ def affine( return inpt.affine( angle, translate=translate, scale=scale, shear=shear, interpolation=interpolation, fill=fill, center=center ) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return affine_image_pil( inpt, angle, @@ -404,19 +406,19 @@ def affine( fill=fill, center=center, ) + else: + fill = _convert_fill_arg(fill) - fill = _convert_fill_arg(fill) - - return affine_image_tensor( - inpt, - angle, - translate=translate, - scale=scale, - shear=shear, - interpolation=interpolation, - fill=fill, - center=center, - ) + return affine_image_tensor( + inpt, + angle, + translate=translate, + scale=scale, + shear=shear, + interpolation=interpolation, + fill=fill, + center=center, + ) def rotate_image_tensor( @@ -444,7 +446,8 @@ def rotate_image_tensor( # we need to set -angle. matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0]) output = _FT.rotate(img, matrix, interpolation=interpolation.value, expand=expand, fill=fill) - return output.view(extra_dims + (num_channels, height, width)) + new_height, new_width = output.shape[-2:] + return output.view(extra_dims + (num_channels, new_height, new_width)) def rotate_image_pil( @@ -513,12 +516,12 @@ def rotate( ) -> DType: if isinstance(inpt, features._Feature): return inpt.rotate(angle, interpolation=interpolation, expand=expand, fill=fill, center=center) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return rotate_image_pil(inpt, angle, interpolation=interpolation, expand=expand, fill=fill, center=center) + else: + fill = _convert_fill_arg(fill) - fill = _convert_fill_arg(fill) - - return rotate_image_tensor(inpt, angle, interpolation=interpolation, expand=expand, fill=fill, center=center) + return rotate_image_tensor(inpt, angle, interpolation=interpolation, expand=expand, fill=fill, center=center) pad_image_pil = _FP.pad @@ -601,21 +604,20 @@ def pad( ) -> DType: if isinstance(inpt, features._Feature): return inpt.pad(padding, fill=fill, padding_mode=padding_mode) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return pad_image_pil(inpt, padding, fill=fill, padding_mode=padding_mode) + else: + # This cast does Sequence[int] -> List[int] and is required to make mypy happy + if not isinstance(padding, int): + padding = list(padding) - # This cast does Sequence[int] -> List[int] and is required to make mypy happy - if not isinstance(padding, int): - padding = list(padding) - - if fill is None: - fill = 0 - - # TODO: PyTorch's pad supports only scalars on fill. So we need to overwrite the colour - if isinstance(fill, (int, float)): - return pad_image_tensor(inpt, padding, fill=fill, padding_mode=padding_mode) + if fill is None: + fill = 0 - return _pad_with_vector_fill(inpt, padding, fill=fill, padding_mode=padding_mode) + # TODO: PyTorch's pad supports only scalars on fill. So we need to overwrite the colour + if isinstance(fill, (int, float)): + return pad_image_tensor(inpt, padding, fill=fill, padding_mode=padding_mode) + return _pad_with_vector_fill(inpt, padding, fill=fill, padding_mode=padding_mode) crop_image_tensor = _FT.crop @@ -648,9 +650,10 @@ def crop_segmentation_mask(img: torch.Tensor, top: int, left: int, height: int, def crop(inpt: DType, top: int, left: int, height: int, width: int) -> DType: if isinstance(inpt, features._Feature): return inpt.crop(top, left, height, width) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return crop_image_pil(inpt, top, left, height, width) - return crop_image_tensor(inpt, top, left, height, width) + else: + return crop_image_tensor(inpt, top, left, height, width) def perspective_image_tensor( @@ -768,20 +771,21 @@ def perspective( ) -> DType: if isinstance(inpt, features._Feature): return inpt.perspective(perspective_coeffs, interpolation=interpolation, fill=fill) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return perspective_image_pil(inpt, perspective_coeffs, interpolation=interpolation, fill=fill) + else: + fill = _convert_fill_arg(fill) - fill = _convert_fill_arg(fill) - - return perspective_image_tensor(inpt, perspective_coeffs, interpolation=interpolation, fill=fill) + return perspective_image_tensor(inpt, perspective_coeffs, interpolation=interpolation, fill=fill) def _center_crop_parse_output_size(output_size: List[int]) -> List[int]: if isinstance(output_size, numbers.Number): return [int(output_size), int(output_size)] - if isinstance(output_size, (tuple, list)) and len(output_size) == 1: + elif isinstance(output_size, (tuple, list)) and len(output_size) == 1: return [output_size[0], output_size[0]] - return list(output_size) + else: + return list(output_size) def _center_crop_compute_padding(crop_height: int, crop_width: int, image_height: int, image_width: int) -> List[int]: @@ -851,9 +855,10 @@ def center_crop_segmentation_mask(segmentation_mask: torch.Tensor, output_size: def center_crop(inpt: DType, output_size: List[int]) -> DType: if isinstance(inpt, features._Feature): return inpt.center_crop(output_size) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return center_crop_image_pil(inpt, output_size) - return center_crop_image_tensor(inpt, output_size) + else: + return center_crop_image_tensor(inpt, output_size) def resized_crop_image_tensor( @@ -921,12 +926,13 @@ def resized_crop( if isinstance(inpt, features._Feature): antialias = False if antialias is None else antialias return inpt.resized_crop(top, left, height, width, antialias=antialias, size=size, interpolation=interpolation) - if isinstance(inpt, PIL.Image.Image): + elif isinstance(inpt, PIL.Image.Image): return resized_crop_image_pil(inpt, top, left, height, width, size=size, interpolation=interpolation) - antialias = False if antialias is None else antialias - return resized_crop_image_tensor( - inpt, top, left, height, width, antialias=antialias, size=size, interpolation=interpolation - ) + else: + antialias = False if antialias is None else antialias + return resized_crop_image_tensor( + inpt, top, left, height, width, antialias=antialias, size=size, interpolation=interpolation + ) def _parse_five_crop_size(size: List[int]) -> List[int]: From 3ce23ef43ac70db6ff8d5b6fbbe10f9d5d691bcf Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Fri, 8 Jul 2022 18:01:09 +0200 Subject: [PATCH 13/14] Fixed acceptable and non-acceptable types for Cutmix/Mixup --- test/test_prototype_transforms.py | 22 +++++++++++++++++++- torchvision/prototype/transforms/_augment.py | 8 ++++--- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index 2c8f65e3086..1934a8bd408 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -3,7 +3,13 @@ import pytest import torch from common_utils import assert_equal -from test_prototype_transforms_functional import make_images, make_bounding_boxes, make_one_hot_labels +from test_prototype_transforms_functional import ( + make_images, + make_bounding_boxes, + make_one_hot_labels, + make_segmentation_masks, + make_label, +) from torchvision.prototype import transforms, features from torchvision.transforms.functional import to_pil_image, pil_to_tensor @@ -102,6 +108,20 @@ def test_common(self, transform, input): def test_mixup_cutmix(self, transform, input): transform(input) + @pytest.mark.parametrize("transform", [transforms.RandomMixup(alpha=1.0), transforms.RandomCutmix(alpha=1.0)]) + def test_mixup_cutmix_assertions(self, transform): + for bbox in make_bounding_boxes(): + with pytest.raises(TypeError, match="does not support"): + transform(bbox) + break + for mask in make_segmentation_masks(): + with pytest.raises(TypeError, match="does not support"): + transform(mask) + break + label = make_label() + with pytest.raises(TypeError, match="does not support"): + transform(label) + @parametrize( [ ( diff --git a/torchvision/prototype/transforms/_augment.py b/torchvision/prototype/transforms/_augment.py index 9a2c2c0f416..df1dd916467 100644 --- a/torchvision/prototype/transforms/_augment.py +++ b/torchvision/prototype/transforms/_augment.py @@ -9,7 +9,7 @@ from torchvision.prototype.transforms import Transform, functional as F from ._transform import _RandomApplyTransform -from ._utils import query_image, get_image_dimensions, has_all +from ._utils import query_image, get_image_dimensions, has_any class RandomErasing(_RandomApplyTransform): @@ -106,8 +106,10 @@ def __init__(self, *, alpha: float) -> None: def forward(self, *inpts: Any) -> Any: sample = inpts if len(inpts) > 1 else inpts[0] - if not has_all(sample, features.Image, features.OneHotLabel): - raise TypeError(f"{type(self).__name__}() is only defined for Image's *and* OneHotLabel's.") + if has_any(sample, features.BoundingBox, features.SegmentationMask, features.Label): + raise TypeError( + f"{type(self).__name__}() does not support bounding boxes, segmentation masks and plain labels." + ) return super().forward(sample) def _mixup_onehotlabel(self, inpt: features.OneHotLabel, lam: float) -> features.OneHotLabel: From 12adbe800ec6ca6f4b963d5616551ea39b20713e Mon Sep 17 00:00:00 2001 From: vfdev-5 Date: Fri, 8 Jul 2022 18:18:54 +0200 Subject: [PATCH 14/14] Updated conditions for _BaseMixupCutmix --- test/test_prototype_transforms.py | 16 ---------------- torchvision/prototype/transforms/_augment.py | 4 +++- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index 1934a8bd408..eb92af41071 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -7,8 +7,6 @@ make_images, make_bounding_boxes, make_one_hot_labels, - make_segmentation_masks, - make_label, ) from torchvision.prototype import transforms, features from torchvision.transforms.functional import to_pil_image, pil_to_tensor @@ -108,20 +106,6 @@ def test_common(self, transform, input): def test_mixup_cutmix(self, transform, input): transform(input) - @pytest.mark.parametrize("transform", [transforms.RandomMixup(alpha=1.0), transforms.RandomCutmix(alpha=1.0)]) - def test_mixup_cutmix_assertions(self, transform): - for bbox in make_bounding_boxes(): - with pytest.raises(TypeError, match="does not support"): - transform(bbox) - break - for mask in make_segmentation_masks(): - with pytest.raises(TypeError, match="does not support"): - transform(mask) - break - label = make_label() - with pytest.raises(TypeError, match="does not support"): - transform(label) - @parametrize( [ ( diff --git a/torchvision/prototype/transforms/_augment.py b/torchvision/prototype/transforms/_augment.py index df1dd916467..8ed81eef8f2 100644 --- a/torchvision/prototype/transforms/_augment.py +++ b/torchvision/prototype/transforms/_augment.py @@ -9,7 +9,7 @@ from torchvision.prototype.transforms import Transform, functional as F from ._transform import _RandomApplyTransform -from ._utils import query_image, get_image_dimensions, has_any +from ._utils import query_image, get_image_dimensions, has_any, has_all class RandomErasing(_RandomApplyTransform): @@ -106,6 +106,8 @@ def __init__(self, *, alpha: float) -> None: def forward(self, *inpts: Any) -> Any: sample = inpts if len(inpts) > 1 else inpts[0] + if not has_all(sample, features.Image, features.OneHotLabel): + raise TypeError(f"{type(self).__name__}() is only defined for Image's *and* OneHotLabel's.") if has_any(sample, features.BoundingBox, features.SegmentationMask, features.Label): raise TypeError( f"{type(self).__name__}() does not support bounding boxes, segmentation masks and plain labels."