diff --git a/test/prototype_transforms_kernel_infos.py b/test/prototype_transforms_kernel_infos.py index efb16acb56c..b41e8409d54 100644 --- a/test/prototype_transforms_kernel_infos.py +++ b/test/prototype_transforms_kernel_infos.py @@ -11,8 +11,8 @@ import torchvision.prototype.transforms.functional as F from datasets_utils import combinations_grid from prototype_common_utils import ArgsKwargs, make_bounding_box_loaders, make_image_loaders, make_mask_loaders - from torchvision.prototype import features +from torchvision.transforms.functional_tensor import _max_value as get_max_value __all__ = ["KernelInfo", "KERNEL_INFOS"] @@ -219,7 +219,7 @@ def reference_inputs_resize_mask(): def sample_inputs_affine_image_tensor(): for image_loader, interpolation_mode, center in itertools.product( - make_image_loaders(dtypes=[torch.float32]), + make_image_loaders(sizes=["random"], dtypes=[torch.float32]), [ F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR, @@ -336,7 +336,7 @@ def reference_inputs_affine_bounding_box(): def sample_inputs_affine_image_mask(): for mask_loader, center in itertools.product( - make_mask_loaders(dtypes=[torch.uint8]), + make_mask_loaders(sizes=["random"], dtypes=[torch.uint8]), [None, (0, 0)], ): yield ArgsKwargs(mask_loader, center=center, **_AFFINE_KWARGS[0]) @@ -451,3 +451,775 @@ def reference_inputs_convert_color_space_image_tensor(): closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, ), ) + + +def sample_inputs_vertical_flip_image_tensor(): + for image_loader in make_image_loaders(sizes=["random"], dtypes=[torch.float32]): + yield ArgsKwargs(image_loader) + + +def reference_inputs_vertical_flip_image_tensor(): + for image_loader in make_image_loaders(extra_dims=[()]): + yield ArgsKwargs(image_loader) + + +def sample_inputs_vertical_flip_bounding_box(): + for bounding_box_loader in make_bounding_box_loaders( + formats=[features.BoundingBoxFormat.XYXY], dtypes=[torch.float32] + ): + yield ArgsKwargs( + bounding_box_loader, format=bounding_box_loader.format, image_size=bounding_box_loader.image_size + ) + + +def sample_inputs_vertical_flip_mask(): + for image_loader in make_mask_loaders(sizes=["random"], dtypes=[torch.uint8]): + yield ArgsKwargs(image_loader) + + +KERNEL_INFOS.extend( + [ + KernelInfo( + F.vertical_flip_image_tensor, + kernel_name="vertical_flip_image_tensor", + sample_inputs_fn=sample_inputs_vertical_flip_image_tensor, + reference_fn=pil_reference_wrapper(F.vertical_flip_image_pil), + reference_inputs_fn=reference_inputs_vertical_flip_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + KernelInfo( + F.vertical_flip_bounding_box, + sample_inputs_fn=sample_inputs_vertical_flip_bounding_box, + ), + KernelInfo( + F.vertical_flip_mask, + sample_inputs_fn=sample_inputs_vertical_flip_mask, + ), + ] +) + +_ROTATE_ANGLES = [-87, 15, 90] + + +def sample_inputs_rotate_image_tensor(): + for image_loader, params in itertools.product( + make_image_loaders(sizes=["random"], dtypes=[torch.float32]), + combinations_grid( + interpolation=[F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR], + expand=[True, False], + center=[None, (0, 0)], + ), + ): + if params["center"] is not None and params["expand"]: + # Otherwise this will emit a warning and ignore center anyway + continue + + for fill in [None, 0.5, [0.5] * image_loader.num_channels]: + yield ArgsKwargs( + image_loader, + angle=_ROTATE_ANGLES[0], + fill=fill, + **params, + ) + + +def reference_inputs_rotate_image_tensor(): + for image_loader, angle in itertools.product(make_image_loaders(extra_dims=[()]), _ROTATE_ANGLES): + yield ArgsKwargs(image_loader, angle=angle) + + +def sample_inputs_rotate_bounding_box(): + for bounding_box_loader in make_bounding_box_loaders(): + yield ArgsKwargs( + bounding_box_loader, + format=bounding_box_loader.format, + image_size=bounding_box_loader.image_size, + angle=_ROTATE_ANGLES[0], + ) + + +def sample_inputs_rotate_mask(): + for image_loader, params in itertools.product( + make_image_loaders(sizes=["random"], dtypes=[torch.uint8]), + combinations_grid( + expand=[True, False], + center=[None, (0, 0)], + ), + ): + if params["center"] is not None and params["expand"]: + # Otherwise this will emit a warning and ignore center anyway + continue + + yield ArgsKwargs( + image_loader, + angle=_ROTATE_ANGLES[0], + **params, + ) + + +@pil_reference_wrapper +def reference_rotate_mask(*args, **kwargs): + return F.rotate_image_pil(*args, interpolation=F.InterpolationMode.NEAREST, **kwargs) + + +def reference_inputs_rotate_mask(): + for mask_loader, angle in itertools.product(make_mask_loaders(extra_dims=[()], num_objects=[1]), _ROTATE_ANGLES): + yield ArgsKwargs(mask_loader, angle=angle) + + +KERNEL_INFOS.extend( + [ + KernelInfo( + F.rotate_image_tensor, + sample_inputs_fn=sample_inputs_rotate_image_tensor, + reference_fn=pil_reference_wrapper(F.rotate_image_pil), + reference_inputs_fn=reference_inputs_rotate_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + KernelInfo( + F.rotate_bounding_box, + sample_inputs_fn=sample_inputs_rotate_bounding_box, + ), + KernelInfo( + F.rotate_mask, + sample_inputs_fn=sample_inputs_rotate_mask, + reference_fn=reference_rotate_mask, + reference_inputs_fn=reference_inputs_rotate_mask, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + ] +) + +_CROP_PARAMS = combinations_grid(top=[-8, 0, 9], left=[-8, 0, 9], height=[12, 20], width=[12, 20]) + + +def sample_inputs_crop_image_tensor(): + for image_loader, params in itertools.product(make_image_loaders(), [_CROP_PARAMS[0], _CROP_PARAMS[-1]]): + yield ArgsKwargs(image_loader, **params) + + +def reference_inputs_crop_image_tensor(): + for image_loader, params in itertools.product(make_image_loaders(extra_dims=[()]), _CROP_PARAMS): + yield ArgsKwargs(image_loader, **params) + + +def sample_inputs_crop_bounding_box(): + for bounding_box_loader, params in itertools.product( + make_bounding_box_loaders(), [_CROP_PARAMS[0], _CROP_PARAMS[-1]] + ): + yield ArgsKwargs(bounding_box_loader, format=bounding_box_loader.format, top=params["top"], left=params["left"]) + + +def sample_inputs_crop_mask(): + for mask_loader, params in itertools.product(make_mask_loaders(), [_CROP_PARAMS[0], _CROP_PARAMS[-1]]): + yield ArgsKwargs(mask_loader, **params) + + +def reference_inputs_crop_mask(): + for mask_loader, params in itertools.product(make_mask_loaders(extra_dims=[()], num_objects=[1]), _CROP_PARAMS): + yield ArgsKwargs(mask_loader, **params) + + +KERNEL_INFOS.extend( + [ + KernelInfo( + F.crop_image_tensor, + kernel_name="crop_image_tensor", + sample_inputs_fn=sample_inputs_crop_image_tensor, + reference_fn=pil_reference_wrapper(F.crop_image_pil), + reference_inputs_fn=reference_inputs_crop_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + KernelInfo( + F.crop_bounding_box, + sample_inputs_fn=sample_inputs_crop_bounding_box, + ), + KernelInfo( + F.crop_mask, + sample_inputs_fn=sample_inputs_crop_mask, + reference_fn=pil_reference_wrapper(F.crop_image_pil), + reference_inputs_fn=reference_inputs_crop_mask, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + ] +) + +_RESIZED_CROP_PARAMS = combinations_grid(top=[-8, 9], left=[-8, 9], height=[12], width=[12], size=[(16, 18)]) + + +def sample_inputs_resized_crop_image_tensor(): + for image_loader in make_image_loaders(): + yield ArgsKwargs(image_loader, **_RESIZED_CROP_PARAMS[0]) + + +@pil_reference_wrapper +def reference_resized_crop_image_tensor(*args, **kwargs): + if not kwargs.pop("antialias", False) and kwargs.get("interpolation", F.InterpolationMode.BILINEAR) in { + F.InterpolationMode.BILINEAR, + F.InterpolationMode.BICUBIC, + }: + raise pytest.UsageError("Anti-aliasing is always active in PIL") + return F.resized_crop_image_pil(*args, **kwargs) + + +def reference_inputs_resized_crop_image_tensor(): + for image_loader, interpolation, params in itertools.product( + make_image_loaders(extra_dims=[()]), + [ + F.InterpolationMode.NEAREST, + F.InterpolationMode.BILINEAR, + F.InterpolationMode.BICUBIC, + ], + _RESIZED_CROP_PARAMS, + ): + yield ArgsKwargs( + image_loader, + interpolation=interpolation, + antialias=interpolation + in { + F.InterpolationMode.BILINEAR, + F.InterpolationMode.BICUBIC, + }, + **params, + ) + + +def sample_inputs_resized_crop_bounding_box(): + for bounding_box_loader in make_bounding_box_loaders(): + yield ArgsKwargs(bounding_box_loader, format=bounding_box_loader.format, **_RESIZED_CROP_PARAMS[0]) + + +def sample_inputs_resized_crop_mask(): + for mask_loader in make_mask_loaders(): + yield ArgsKwargs(mask_loader, **_RESIZED_CROP_PARAMS[0]) + + +def reference_inputs_resized_crop_mask(): + for mask_loader, params in itertools.product( + make_mask_loaders(extra_dims=[()], num_objects=[1]), _RESIZED_CROP_PARAMS + ): + yield ArgsKwargs(mask_loader, **params) + + +KERNEL_INFOS.extend( + [ + KernelInfo( + F.resized_crop_image_tensor, + sample_inputs_fn=sample_inputs_resized_crop_image_tensor, + reference_fn=reference_resized_crop_image_tensor, + reference_inputs_fn=reference_inputs_resized_crop_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + KernelInfo( + F.resized_crop_bounding_box, + sample_inputs_fn=sample_inputs_resized_crop_bounding_box, + ), + KernelInfo( + F.resized_crop_mask, + sample_inputs_fn=sample_inputs_resized_crop_mask, + reference_fn=pil_reference_wrapper(F.resized_crop_image_pil), + reference_inputs_fn=reference_inputs_resized_crop_mask, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + ] +) + +_PAD_PARAMS = combinations_grid( + padding=[[1], [1, 1], [1, 1, 2, 2]], + padding_mode=["constant", "symmetric", "edge", "reflect"], +) + + +def sample_inputs_pad_image_tensor(): + for image_loader, params in itertools.product(make_image_loaders(sizes=["random"]), _PAD_PARAMS): + fills = [None, 128.0, 128, [12.0]] + if params["padding_mode"] == "constant": + fills.append([12.0 + c for c in range(image_loader.num_channels)]) + for fill in fills: + yield ArgsKwargs(image_loader, fill=fill, **params) + + +def reference_inputs_pad_image_tensor(): + for image_loader, params in itertools.product(make_image_loaders(extra_dims=[()]), _PAD_PARAMS): + # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it? + fills = [None, 128.0, 128] + if params["padding_mode"] == "constant": + fills.append([12.0 + c for c in range(image_loader.num_channels)]) + for fill in fills: + yield ArgsKwargs(image_loader, fill=fill, **params) + + +def sample_inputs_pad_bounding_box(): + for bounding_box_loader, params in itertools.product(make_bounding_box_loaders(), _PAD_PARAMS): + if params["padding_mode"] != "constant": + continue + + yield ArgsKwargs(bounding_box_loader, format=bounding_box_loader.format, **params) + + +def sample_inputs_pad_mask(): + for image_loader, fill, params in itertools.product(make_mask_loaders(sizes=["random"]), [None, 127], _PAD_PARAMS): + yield ArgsKwargs(image_loader, fill=fill, **params) + + +def reference_inputs_pad_mask(): + for image_loader, fill, params in itertools.product(make_image_loaders(extra_dims=[()]), [None, 127], _PAD_PARAMS): + yield ArgsKwargs(image_loader, fill=fill, **params) + + +KERNEL_INFOS.extend( + [ + KernelInfo( + F.pad_image_tensor, + sample_inputs_fn=sample_inputs_pad_image_tensor, + reference_fn=pil_reference_wrapper(F.pad_image_pil), + reference_inputs_fn=reference_inputs_pad_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + KernelInfo( + F.pad_bounding_box, + sample_inputs_fn=sample_inputs_pad_bounding_box, + ), + KernelInfo( + F.pad_mask, + sample_inputs_fn=sample_inputs_pad_mask, + reference_fn=pil_reference_wrapper(F.pad_image_pil), + reference_inputs_fn=reference_inputs_pad_mask, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + ] +) + +_PERSPECTIVE_COEFFS = [ + [1.2405, 0.1772, -6.9113, 0.0463, 1.251, -5.235, 0.00013, 0.0018], + [0.7366, -0.11724, 1.45775, -0.15012, 0.73406, 2.6019, -0.0072, -0.0063], +] + + +def sample_inputs_perspective_image_tensor(): + for image_loader in make_image_loaders( + sizes=["random"], + # FIXME: kernel should support arbitrary batch sizes + extra_dims=[(), (4,)], + ): + for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]: + yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=_PERSPECTIVE_COEFFS[0]) + + +def reference_inputs_perspective_image_tensor(): + for image_loader, perspective_coeffs in itertools.product(make_image_loaders(extra_dims=[()]), _PERSPECTIVE_COEFFS): + # FIXME: PIL kernel doesn't support sequences of length 1 if the number of channels is larger. Shouldn't it? + for fill in [None, 128.0, 128, [12.0 + c for c in range(image_loader.num_channels)]]: + yield ArgsKwargs(image_loader, fill=fill, perspective_coeffs=perspective_coeffs) + + +def sample_inputs_perspective_bounding_box(): + for bounding_box_loader in make_bounding_box_loaders(): + yield ArgsKwargs( + bounding_box_loader, format=bounding_box_loader.format, perspective_coeffs=_PERSPECTIVE_COEFFS[0] + ) + + +def sample_inputs_perspective_mask(): + for mask_loader in make_mask_loaders( + sizes=["random"], + # FIXME: kernel should support arbitrary batch sizes + extra_dims=[(), (4,)], + ): + yield ArgsKwargs(mask_loader, perspective_coeffs=_PERSPECTIVE_COEFFS[0]) + + +def reference_inputs_perspective_mask(): + for mask_loader, perspective_coeffs in itertools.product( + make_mask_loaders(extra_dims=[()], num_objects=[1]), _PERSPECTIVE_COEFFS + ): + yield ArgsKwargs(mask_loader, perspective_coeffs=perspective_coeffs) + + +KERNEL_INFOS.extend( + [ + KernelInfo( + F.perspective_image_tensor, + sample_inputs_fn=sample_inputs_perspective_image_tensor, + reference_fn=pil_reference_wrapper(F.perspective_image_pil), + reference_inputs_fn=reference_inputs_perspective_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + KernelInfo( + F.perspective_bounding_box, + sample_inputs_fn=sample_inputs_perspective_bounding_box, + ), + KernelInfo( + F.perspective_mask, + sample_inputs_fn=sample_inputs_perspective_mask, + reference_fn=pil_reference_wrapper(F.perspective_image_pil), + reference_inputs_fn=reference_inputs_perspective_mask, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + ] +) + + +def _get_elastic_displacement(image_size): + return torch.rand(1, *image_size, 2) + + +def sample_inputs_elastic_image_tensor(): + for image_loader in make_image_loaders( + sizes=["random"], + # FIXME: kernel should support arbitrary batch sizes + extra_dims=[(), (4,)], + ): + displacement = _get_elastic_displacement(image_loader.image_size) + for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]: + yield ArgsKwargs(image_loader, displacement=displacement, fill=fill) + + +def reference_inputs_elastic_image_tensor(): + for image_loader, interpolation in itertools.product( + make_image_loaders(extra_dims=[()]), + [ + F.InterpolationMode.NEAREST, + F.InterpolationMode.BILINEAR, + F.InterpolationMode.BICUBIC, + ], + ): + displacement = _get_elastic_displacement(image_loader.image_size) + for fill in [None, 128.0, 128, [12.0], [12.0 + c for c in range(image_loader.num_channels)]]: + yield ArgsKwargs(image_loader, interpolation=interpolation, displacement=displacement, fill=fill) + + +def sample_inputs_elastic_bounding_box(): + for bounding_box_loader in make_bounding_box_loaders(): + displacement = _get_elastic_displacement(bounding_box_loader.image_size) + yield ArgsKwargs( + bounding_box_loader, + format=bounding_box_loader.format, + displacement=displacement, + ) + + +def sample_inputs_elastic_mask(): + for mask_loader in make_mask_loaders( + sizes=["random"], + # FIXME: kernel should support arbitrary batch sizes + extra_dims=[(), (4,)], + ): + displacement = _get_elastic_displacement(mask_loader.shape[-2:]) + yield ArgsKwargs(mask_loader, displacement=displacement) + + +def reference_inputs_elastic_mask(): + for mask_loader in make_mask_loaders(extra_dims=[()], num_objects=[1]): + displacement = _get_elastic_displacement(mask_loader.shape[-2:]) + yield ArgsKwargs(mask_loader, displacement=displacement) + + +KERNEL_INFOS.extend( + [ + KernelInfo( + F.elastic_image_tensor, + sample_inputs_fn=sample_inputs_elastic_image_tensor, + reference_fn=pil_reference_wrapper(F.elastic_image_pil), + reference_inputs_fn=reference_inputs_elastic_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + KernelInfo( + F.elastic_bounding_box, + sample_inputs_fn=sample_inputs_elastic_bounding_box, + ), + KernelInfo( + F.elastic_mask, + sample_inputs_fn=sample_inputs_elastic_mask, + reference_fn=pil_reference_wrapper(F.elastic_image_pil), + reference_inputs_fn=reference_inputs_elastic_mask, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + ] +) + + +_CENTER_CROP_IMAGE_SIZES = [(16, 16), (7, 33), (31, 9)] +_CENTER_CROP_OUTPUT_SIZES = [[4, 3], [42, 70], [4]] + + +def sample_inputs_center_crop_image_tensor(): + for image_loader, output_size in itertools.product( + make_image_loaders(sizes=_CENTER_CROP_IMAGE_SIZES), _CENTER_CROP_OUTPUT_SIZES + ): + yield ArgsKwargs(image_loader, output_size=output_size) + + +def reference_inputs_center_crop_image_tensor(): + for image_loader, output_size in itertools.product( + make_image_loaders(sizes=_CENTER_CROP_IMAGE_SIZES, extra_dims=[()]), _CENTER_CROP_OUTPUT_SIZES + ): + yield ArgsKwargs(image_loader, output_size=output_size) + + +def sample_inputs_center_crop_bounding_box(): + for bounding_box_loader, output_size in itertools.product(make_bounding_box_loaders(), _CENTER_CROP_OUTPUT_SIZES): + yield ArgsKwargs( + bounding_box_loader, + format=bounding_box_loader.format, + image_size=bounding_box_loader.image_size, + output_size=output_size, + ) + + +def sample_inputs_center_crop_mask(): + for mask_loader, output_size in itertools.product( + make_mask_loaders(sizes=_CENTER_CROP_IMAGE_SIZES), _CENTER_CROP_OUTPUT_SIZES + ): + yield ArgsKwargs(mask_loader, output_size=output_size) + + +def reference_inputs_center_crop_mask(): + for mask_loader, output_size in itertools.product( + make_mask_loaders(sizes=_CENTER_CROP_IMAGE_SIZES, extra_dims=[()], num_objects=[1]), _CENTER_CROP_OUTPUT_SIZES + ): + yield ArgsKwargs(mask_loader, output_size=output_size) + + +KERNEL_INFOS.extend( + [ + KernelInfo( + F.center_crop_image_tensor, + sample_inputs_fn=sample_inputs_center_crop_image_tensor, + reference_fn=pil_reference_wrapper(F.center_crop_image_pil), + reference_inputs_fn=reference_inputs_center_crop_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + KernelInfo( + F.center_crop_bounding_box, + sample_inputs_fn=sample_inputs_center_crop_bounding_box, + ), + KernelInfo( + F.center_crop_mask, + sample_inputs_fn=sample_inputs_center_crop_mask, + reference_fn=pil_reference_wrapper(F.center_crop_image_pil), + reference_inputs_fn=reference_inputs_center_crop_mask, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ), + ] +) + + +def sample_inputs_gaussian_blur_image_tensor(): + for image_loader, params in itertools.product( + make_image_loaders( + sizes=["random"], + # FIXME: kernel should support arbitrary batch sizes + extra_dims=[(), (4,)], + ), + combinations_grid( + kernel_size=[(3, 3)], + sigma=[None, (3.0, 3.0)], + ), + ): + yield ArgsKwargs(image_loader, **params) + + +KERNEL_INFOS.append( + KernelInfo( + F.gaussian_blur_image_tensor, + sample_inputs_fn=sample_inputs_gaussian_blur_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ) +) + + +def sample_inputs_equalize_image_tensor(): + for image_loader in make_image_loaders( + sizes=["random"], + # FIXME: kernel should support arbitrary batch sizes + extra_dims=[(), (4,)], + color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), + dtypes=[torch.uint8], + ): + yield ArgsKwargs(image_loader) + + +def reference_inputs_equalize_image_tensor(): + for image_loader in make_image_loaders( + extra_dims=[()], color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), dtypes=[torch.uint8] + ): + yield ArgsKwargs(image_loader) + + +KERNEL_INFOS.append( + KernelInfo( + F.equalize_image_tensor, + kernel_name="equalize_image_tensor", + sample_inputs_fn=sample_inputs_equalize_image_tensor, + reference_fn=pil_reference_wrapper(F.equalize_image_pil), + reference_inputs_fn=reference_inputs_equalize_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ) +) + + +def sample_inputs_invert_image_tensor(): + for image_loader in make_image_loaders( + sizes=["random"], color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB) + ): + yield ArgsKwargs(image_loader) + + +def reference_inputs_invert_image_tensor(): + for image_loader in make_image_loaders( + color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()] + ): + yield ArgsKwargs(image_loader) + + +KERNEL_INFOS.append( + KernelInfo( + F.invert_image_tensor, + kernel_name="invert_image_tensor", + sample_inputs_fn=sample_inputs_invert_image_tensor, + reference_fn=pil_reference_wrapper(F.invert_image_pil), + reference_inputs_fn=reference_inputs_invert_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ) +) + + +_POSTERIZE_BITS = [1, 4, 8] + + +def sample_inputs_posterize_image_tensor(): + for image_loader in make_image_loaders( + sizes=["random"], color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), dtypes=[torch.uint8] + ): + yield ArgsKwargs(image_loader, bits=_POSTERIZE_BITS[0]) + + +def reference_inputs_posterize_image_tensor(): + for image_loader, bits in itertools.product( + make_image_loaders( + color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()], dtypes=[torch.uint8] + ), + _POSTERIZE_BITS, + ): + yield ArgsKwargs(image_loader, bits=bits) + + +KERNEL_INFOS.append( + KernelInfo( + F.posterize_image_tensor, + kernel_name="posterize_image_tensor", + sample_inputs_fn=sample_inputs_posterize_image_tensor, + reference_fn=pil_reference_wrapper(F.posterize_image_pil), + reference_inputs_fn=reference_inputs_posterize_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ) +) + + +def _get_solarize_thresholds(dtype): + for factor in [0.1, 0.5]: + max_value = get_max_value(dtype) + yield (float if dtype.is_floating_point else int)(max_value * factor) + + +def sample_inputs_solarize_image_tensor(): + for image_loader in make_image_loaders( + sizes=["random"], color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB) + ): + yield ArgsKwargs(image_loader, threshold=next(_get_solarize_thresholds(image_loader.dtype))) + + +def reference_inputs_solarize_image_tensor(): + for image_loader in make_image_loaders( + color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()] + ): + for threshold in _get_solarize_thresholds(image_loader.dtype): + yield ArgsKwargs(image_loader, threshold=threshold) + + +KERNEL_INFOS.append( + KernelInfo( + F.solarize_image_tensor, + kernel_name="solarize_image_tensor", + sample_inputs_fn=sample_inputs_solarize_image_tensor, + reference_fn=pil_reference_wrapper(F.solarize_image_pil), + reference_inputs_fn=reference_inputs_solarize_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ) +) + + +def sample_inputs_autocontrast_image_tensor(): + for image_loader in make_image_loaders( + sizes=["random"], color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB) + ): + yield ArgsKwargs(image_loader) + + +def reference_inputs_autocontrast_image_tensor(): + for image_loader in make_image_loaders( + color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()] + ): + yield ArgsKwargs(image_loader) + + +KERNEL_INFOS.append( + KernelInfo( + F.autocontrast_image_tensor, + kernel_name="autocontrast_image_tensor", + sample_inputs_fn=sample_inputs_autocontrast_image_tensor, + reference_fn=pil_reference_wrapper(F.autocontrast_image_pil), + reference_inputs_fn=reference_inputs_autocontrast_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ) +) + +_ADJUST_SHARPNESS_FACTORS = [0.1, 0.5] + + +def sample_inputs_adjust_sharpness_image_tensor(): + for image_loader in make_image_loaders( + sizes=["random", (2, 2)], + color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), + # FIXME: kernel should support arbitrary batch sizes + extra_dims=[(), (4,)], + ): + yield ArgsKwargs(image_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0]) + + +def reference_inputs_adjust_sharpness_image_tensor(): + for image_loader, sharpness_factor in itertools.product( + make_image_loaders(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB), extra_dims=[()]), + _ADJUST_SHARPNESS_FACTORS, + ): + yield ArgsKwargs(image_loader, sharpness_factor=sharpness_factor) + + +KERNEL_INFOS.append( + KernelInfo( + F.adjust_sharpness_image_tensor, + kernel_name="adjust_sharpness_image_tensor", + sample_inputs_fn=sample_inputs_adjust_sharpness_image_tensor, + reference_fn=pil_reference_wrapper(F.adjust_sharpness_image_pil), + reference_inputs_fn=reference_inputs_adjust_sharpness_image_tensor, + closeness_kwargs=DEFAULT_IMAGE_CLOSENESS_KWARGS, + ) +) + + +def sample_inputs_erase_image_tensor(): + for image_loader in make_image_loaders(sizes=["random"]): + # FIXME: make the parameters more diverse + h, w = 6, 7 + v = torch.rand(image_loader.num_channels, h, w) + yield ArgsKwargs(image_loader, i=1, j=2, h=h, w=w, v=v) + + +KERNEL_INFOS.append( + KernelInfo( + F.erase_image_tensor, + kernel_name="erase_image_tensor", + sample_inputs_fn=sample_inputs_erase_image_tensor, + ) +) diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py index 4c0b3ed87c3..cf85ce8c34a 100644 --- a/test/test_prototype_transforms_functional.py +++ b/test/test_prototype_transforms_functional.py @@ -1,4 +1,3 @@ -import itertools import math import os @@ -8,14 +7,7 @@ import torch.testing import torchvision.prototype.transforms.functional as F from common_utils import cpu_and_gpu -from prototype_common_utils import ( - ArgsKwargs, - make_bounding_boxes, - make_detection_masks, - make_image, - make_images, - make_masks, -) +from prototype_common_utils import ArgsKwargs, make_bounding_boxes, make_image from torch import jit from torchvision.prototype import features from torchvision.prototype.transforms.functional._geometry import _center_crop_compute_padding @@ -48,365 +40,6 @@ def register_kernel_info_from_sample_inputs_fn(sample_inputs_fn): return sample_inputs_fn -@register_kernel_info_from_sample_inputs_fn -def vertical_flip_image_tensor(): - for image in make_images(): - yield ArgsKwargs(image) - - -@register_kernel_info_from_sample_inputs_fn -def vertical_flip_bounding_box(): - for bounding_box in make_bounding_boxes(formats=[features.BoundingBoxFormat.XYXY]): - yield ArgsKwargs(bounding_box, format=bounding_box.format, image_size=bounding_box.image_size) - - -@register_kernel_info_from_sample_inputs_fn -def vertical_flip_mask(): - for mask in make_masks(): - yield ArgsKwargs(mask) - - -@register_kernel_info_from_sample_inputs_fn -def rotate_image_tensor(): - for image, angle, expand, center in itertools.product( - make_images(), - [-87, 15, 90], # angle - [True, False], # expand - [None, [12, 23]], # center - ): - if center is not None and expand: - # Skip warning: The provided center argument is ignored if expand is True - continue - - yield ArgsKwargs(image, angle=angle, expand=expand, center=center, fill=None) - - for fill in [None, 128.0, 128, [12.0], [1.0, 2.0, 3.0]]: - yield ArgsKwargs(image, angle=23, expand=False, center=None, fill=fill) - - -@register_kernel_info_from_sample_inputs_fn -def rotate_bounding_box(): - for bounding_box, angle, expand, center in itertools.product( - make_bounding_boxes(), [-87, 15, 90], [True, False], [None, [12, 23]] - ): - if center is not None and expand: - # Skip warning: The provided center argument is ignored if expand is True - continue - - yield ArgsKwargs( - bounding_box, - format=bounding_box.format, - image_size=bounding_box.image_size, - angle=angle, - expand=expand, - center=center, - ) - - -@register_kernel_info_from_sample_inputs_fn -def rotate_mask(): - for mask, angle, expand, center in itertools.product( - make_masks(), - [-87, 15, 90], # angle - [True, False], # expand - [None, [12, 23]], # center - ): - if center is not None and expand: - # Skip warning: The provided center argument is ignored if expand is True - continue - - yield ArgsKwargs( - mask, - angle=angle, - expand=expand, - center=center, - ) - - -@register_kernel_info_from_sample_inputs_fn -def crop_image_tensor(): - for image, top, left, height, width in itertools.product(make_images(), [-8, 0, 9], [-8, 0, 9], [12, 20], [12, 20]): - yield ArgsKwargs( - image, - top=top, - left=left, - height=height, - width=width, - ) - - -@register_kernel_info_from_sample_inputs_fn -def crop_bounding_box(): - for bounding_box, top, left in itertools.product(make_bounding_boxes(), [-8, 0, 9], [-8, 0, 9]): - yield ArgsKwargs( - bounding_box, - format=bounding_box.format, - top=top, - left=left, - ) - - -@register_kernel_info_from_sample_inputs_fn -def crop_mask(): - for mask, top, left, height, width in itertools.product(make_masks(), [-8, 0, 9], [-8, 0, 9], [12, 20], [12, 20]): - yield ArgsKwargs( - mask, - top=top, - left=left, - height=height, - width=width, - ) - - -@register_kernel_info_from_sample_inputs_fn -def resized_crop_image_tensor(): - for mask, top, left, height, width, size, antialias in itertools.product( - make_images(), - [-8, 9], - [-8, 9], - [12], - [12], - [(16, 18)], - [True, False], - ): - yield ArgsKwargs(mask, top=top, left=left, height=height, width=width, size=size, antialias=antialias) - - -@register_kernel_info_from_sample_inputs_fn -def resized_crop_bounding_box(): - for bounding_box, top, left, height, width, size in itertools.product( - make_bounding_boxes(), [-8, 9], [-8, 9], [32, 22], [34, 20], [(32, 32), (16, 18)] - ): - yield ArgsKwargs( - bounding_box, format=bounding_box.format, top=top, left=left, height=height, width=width, size=size - ) - - -@register_kernel_info_from_sample_inputs_fn -def resized_crop_mask(): - for mask, top, left, height, width, size in itertools.product( - make_masks(), [-8, 0, 9], [-8, 0, 9], [12, 20], [12, 20], [(32, 32), (16, 18)] - ): - yield ArgsKwargs(mask, top=top, left=left, height=height, width=width, size=size) - - -@register_kernel_info_from_sample_inputs_fn -def pad_image_tensor(): - for image, padding, fill, padding_mode in itertools.product( - make_images(), - [[1], [1, 1], [1, 1, 2, 2]], # padding - [None, 128.0, 128, [12.0], [12.0, 13.0, 14.0]], # fill - ["constant", "symmetric", "edge", "reflect"], # padding mode, - ): - if padding_mode != "constant" and fill is not None: - # ValueError: Padding mode 'reflect' is not supported if fill is not scalar - continue - - if isinstance(fill, list) and len(fill) != image.shape[-3]: - continue - - yield ArgsKwargs(image, padding=padding, fill=fill, padding_mode=padding_mode) - - -@register_kernel_info_from_sample_inputs_fn -def pad_mask(): - for mask, padding, padding_mode in itertools.product( - make_masks(), - [[1], [1, 1], [1, 1, 2, 2]], # padding - ["constant", "symmetric", "edge", "reflect"], # padding mode, - ): - yield ArgsKwargs(mask, padding=padding, padding_mode=padding_mode) - - -@register_kernel_info_from_sample_inputs_fn -def pad_bounding_box(): - for bounding_box, padding in itertools.product( - make_bounding_boxes(), - [[1], [1, 1], [1, 1, 2, 2]], - ): - yield ArgsKwargs(bounding_box, padding=padding, format=bounding_box.format) - - -@register_kernel_info_from_sample_inputs_fn -def perspective_image_tensor(): - for image, perspective_coeffs, fill in itertools.product( - make_images(extra_dims=((), (4,))), - [ - [1.2405, 0.1772, -6.9113, 0.0463, 1.251, -5.235, 0.00013, 0.0018], - [0.7366, -0.11724, 1.45775, -0.15012, 0.73406, 2.6019, -0.0072, -0.0063], - ], - [None, 128.0, 128, [12.0], [1.0, 2.0, 3.0]], # fill - ): - if isinstance(fill, list) and len(fill) == 3 and image.shape[1] != 3: - # skip the test with non-broadcastable fill value - continue - - yield ArgsKwargs(image, perspective_coeffs=perspective_coeffs, fill=fill) - - -@register_kernel_info_from_sample_inputs_fn -def perspective_bounding_box(): - for bounding_box, perspective_coeffs in itertools.product( - make_bounding_boxes(), - [ - [1.2405, 0.1772, -6.9113, 0.0463, 1.251, -5.235, 0.00013, 0.0018], - [0.7366, -0.11724, 1.45775, -0.15012, 0.73406, 2.6019, -0.0072, -0.0063], - ], - ): - yield ArgsKwargs( - bounding_box, - format=bounding_box.format, - perspective_coeffs=perspective_coeffs, - ) - - -@register_kernel_info_from_sample_inputs_fn -def perspective_mask(): - for mask, perspective_coeffs in itertools.product( - make_masks(extra_dims=((), (4,))), - [ - [1.2405, 0.1772, -6.9113, 0.0463, 1.251, -5.235, 0.00013, 0.0018], - [0.7366, -0.11724, 1.45775, -0.15012, 0.73406, 2.6019, -0.0072, -0.0063], - ], - ): - yield ArgsKwargs( - mask, - perspective_coeffs=perspective_coeffs, - ) - - -@register_kernel_info_from_sample_inputs_fn -def elastic_image_tensor(): - for image, fill in itertools.product( - make_images(extra_dims=((), (4,))), - [None, 128.0, 128, [12.0], [1.0, 2.0, 3.0]], # fill - ): - if isinstance(fill, list) and len(fill) == 3 and image.shape[1] != 3: - # skip the test with non-broadcastable fill value - continue - - h, w = image.shape[-2:] - displacement = torch.rand(1, h, w, 2) - yield ArgsKwargs(image, displacement=displacement, fill=fill) - - -@register_kernel_info_from_sample_inputs_fn -def elastic_bounding_box(): - for bounding_box in make_bounding_boxes(): - h, w = bounding_box.image_size - displacement = torch.rand(1, h, w, 2) - yield ArgsKwargs( - bounding_box, - format=bounding_box.format, - displacement=displacement, - ) - - -@register_kernel_info_from_sample_inputs_fn -def elastic_mask(): - for mask in make_masks(extra_dims=((), (4,))): - h, w = mask.shape[-2:] - displacement = torch.rand(1, h, w, 2) - yield ArgsKwargs( - mask, - displacement=displacement, - ) - - -@register_kernel_info_from_sample_inputs_fn -def center_crop_image_tensor(): - for mask, output_size in itertools.product( - make_images(sizes=((16, 16), (7, 33), (31, 9))), - [[4, 3], [42, 70], [4]], # crop sizes < image sizes, crop_sizes > image sizes, single crop size - ): - yield ArgsKwargs(mask, output_size) - - -@register_kernel_info_from_sample_inputs_fn -def center_crop_bounding_box(): - for bounding_box, output_size in itertools.product(make_bounding_boxes(), [(24, 12), [16, 18], [46, 48], [12]]): - yield ArgsKwargs( - bounding_box, format=bounding_box.format, output_size=output_size, image_size=bounding_box.image_size - ) - - -@register_kernel_info_from_sample_inputs_fn -def center_crop_mask(): - for mask, output_size in itertools.product( - make_masks(sizes=((16, 16), (7, 33), (31, 9))), - [[4, 3], [42, 70], [4]], # crop sizes < image sizes, crop_sizes > image sizes, single crop size - ): - yield ArgsKwargs(mask, output_size) - - -@register_kernel_info_from_sample_inputs_fn -def gaussian_blur_image_tensor(): - for image, kernel_size, sigma in itertools.product( - make_images(extra_dims=((4,),)), - [[3, 3]], - [None, [3.0, 3.0]], - ): - yield ArgsKwargs(image, kernel_size=kernel_size, sigma=sigma) - - -@register_kernel_info_from_sample_inputs_fn -def equalize_image_tensor(): - for image in make_images(extra_dims=(), color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB)): - if image.dtype != torch.uint8: - continue - yield ArgsKwargs(image) - - -@register_kernel_info_from_sample_inputs_fn -def invert_image_tensor(): - for image in make_images(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB)): - yield ArgsKwargs(image) - - -@register_kernel_info_from_sample_inputs_fn -def posterize_image_tensor(): - for image, bits in itertools.product( - make_images(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB)), - [1, 4, 8], - ): - if image.dtype != torch.uint8: - continue - yield ArgsKwargs(image, bits=bits) - - -@register_kernel_info_from_sample_inputs_fn -def solarize_image_tensor(): - for image, threshold in itertools.product( - make_images(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB)), - [0.1, 0.5, 127.0], - ): - if image.is_floating_point() and threshold > 1.0: - continue - yield ArgsKwargs(image, threshold=threshold) - - -@register_kernel_info_from_sample_inputs_fn -def autocontrast_image_tensor(): - for image in make_images(color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB)): - yield ArgsKwargs(image) - - -@register_kernel_info_from_sample_inputs_fn -def adjust_sharpness_image_tensor(): - for image, sharpness_factor in itertools.product( - make_images(extra_dims=((4,),), color_spaces=(features.ColorSpace.GRAY, features.ColorSpace.RGB)), - [0.1, 0.5], - ): - yield ArgsKwargs(image, sharpness_factor=sharpness_factor) - - -@register_kernel_info_from_sample_inputs_fn -def erase_image_tensor(): - for image in make_images(): - c = image.shape[-3] - yield ArgsKwargs(image, i=1, j=2, h=6, w=7, v=torch.rand(c, 6, 7)) - - _KERNEL_TYPES = {"_image_tensor", "_image_pil", "_mask", "_bounding_box", "_label"} @@ -787,80 +420,6 @@ def test_correctness_rotate_bounding_box_on_fixed_input(device, expand): torch.testing.assert_close(output_boxes.tolist(), expected_bboxes) -@pytest.mark.parametrize("angle", range(-89, 90, 37)) -@pytest.mark.parametrize("expand, center", [(True, None), (False, None), (False, (12, 14))]) -def test_correctness_rotate_mask(angle, expand, center): - def _compute_expected_mask(mask, angle_, expand_, center_): - assert mask.ndim == 3 - c, *image_size = mask.shape - affine_matrix = _compute_affine_matrix(angle_, [0.0, 0.0], 1.0, [0.0, 0.0], center_) - inv_affine_matrix = np.linalg.inv(affine_matrix) - - if expand_: - # Pillow implementation on how to perform expand: - # https://github.com/python-pillow/Pillow/blob/11de3318867e4398057373ee9f12dcb33db7335c/src/PIL/Image.py#L2054-L2069 - height, width = image_size - points = np.array( - [ - [0.0, 0.0, 1.0], - [0.0, 1.0 * height, 1.0], - [1.0 * width, 1.0 * height, 1.0], - [1.0 * width, 0.0, 1.0], - ] - ) - new_points = points @ inv_affine_matrix.T - min_vals = np.min(new_points, axis=0)[:2] - max_vals = np.max(new_points, axis=0)[:2] - cmax = np.ceil(np.trunc(max_vals * 1e4) * 1e-4) - cmin = np.floor(np.trunc((min_vals + 1e-8) * 1e4) * 1e-4) - new_width, new_height = (cmax - cmin).astype("int").tolist() - tr = np.array([-(new_width - width) / 2.0, -(new_height - height) / 2.0, 1.0]) @ inv_affine_matrix.T - - inv_affine_matrix[:2, 2] = tr[:2] - image_size = [new_height, new_width] - - inv_affine_matrix = inv_affine_matrix[:2, :] - expected_mask = torch.zeros(c, *image_size, dtype=mask.dtype) - - for out_y in range(expected_mask.shape[1]): - for out_x in range(expected_mask.shape[2]): - output_pt = np.array([out_x + 0.5, out_y + 0.5, 1.0]) - input_pt = np.floor(np.dot(inv_affine_matrix, output_pt)).astype("int") - in_x, in_y = input_pt[:2] - if 0 <= in_x < mask.shape[2] and 0 <= in_y < mask.shape[1]: - for i in range(expected_mask.shape[0]): - expected_mask[i, out_y, out_x] = mask[i, in_y, in_x] - return expected_mask.to(mask.device) - - # FIXME: `_compute_expected_mask` currently only works for "detection" masks. Extend it for "segmentation" masks. - for mask in make_detection_masks(extra_dims=((), (4,))): - output_mask = F.rotate_mask( - mask, - angle=angle, - expand=expand, - center=center, - ) - - center_ = center - if center_ is None: - center_ = [s * 0.5 for s in mask.shape[-2:][::-1]] - - if mask.ndim < 4: - masks = [mask] - else: - masks = [m for m in mask] - - expected_masks = [] - for mask in masks: - expected_mask = _compute_expected_mask(mask, -angle, expand, center_) - expected_masks.append(expected_mask) - if len(expected_masks) > 1: - expected_masks = torch.stack(expected_masks) - else: - expected_masks = expected_masks[0] - torch.testing.assert_close(output_mask, expected_masks) - - @pytest.mark.parametrize("device", cpu_and_gpu()) def test_correctness_rotate_segmentation_mask_on_fixed_input(device): # Check transformation against known expected output and CPU/CUDA devices @@ -927,47 +486,6 @@ def test_correctness_crop_bounding_box(device, format, top, left, height, width, torch.testing.assert_close(output_boxes.tolist(), expected_bboxes) -@pytest.mark.parametrize("device", cpu_and_gpu()) -@pytest.mark.parametrize( - "top, left, height, width", - [ - [4, 6, 30, 40], - [-8, 6, 70, 40], - [-8, -6, 70, 8], - ], -) -def test_correctness_crop_mask(device, top, left, height, width): - def _compute_expected_mask(mask, top_, left_, height_, width_): - h, w = mask.shape[-2], mask.shape[-1] - if top_ >= 0 and left_ >= 0 and top_ + height_ < h and left_ + width_ < w: - expected = mask[..., top_ : top_ + height_, left_ : left_ + width_] - else: - # Create output mask - expected_shape = mask.shape[:-2] + (height_, width_) - expected = torch.zeros(expected_shape, device=mask.device, dtype=mask.dtype) - - out_y1 = abs(top_) if top_ < 0 else 0 - out_y2 = h - top_ if top_ + height_ >= h else height_ - out_x1 = abs(left_) if left_ < 0 else 0 - out_x2 = w - left_ if left_ + width_ >= w else width_ - - in_y1 = 0 if top_ < 0 else top_ - in_y2 = h if top_ + height_ >= h else top_ + height_ - in_x1 = 0 if left_ < 0 else left_ - in_x2 = w if left_ + width_ >= w else left_ + width_ - # Paste input mask into output - expected[..., out_y1:out_y2, out_x1:out_x2] = mask[..., in_y1:in_y2, in_x1:in_x2] - - return expected - - for mask in make_masks(): - if mask.device != torch.device(device): - mask = mask.to(device) - output_mask = F.crop_mask(mask, top, left, height, width) - expected_mask = _compute_expected_mask(mask, top, left, height, width) - torch.testing.assert_close(output_mask, expected_mask) - - @pytest.mark.parametrize("device", cpu_and_gpu()) def test_correctness_horizontal_flip_segmentation_mask_on_fixed_input(device): mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device) @@ -1038,31 +556,6 @@ def _compute_expected_bbox(bbox, top_, left_, height_, width_, size_): torch.testing.assert_close(output_boxes, expected_bboxes) -@pytest.mark.parametrize("device", cpu_and_gpu()) -@pytest.mark.parametrize( - "top, left, height, width, size", - [ - [0, 0, 30, 30, (60, 60)], - [5, 5, 35, 45, (32, 34)], - ], -) -def test_correctness_resized_crop_mask(device, top, left, height, width, size): - def _compute_expected_mask(mask, top_, left_, height_, width_, size_): - output = mask.clone() - output = output[:, top_ : top_ + height_, left_ : left_ + width_] - output = torch.nn.functional.interpolate(output[None, :].float(), size=size_, mode="nearest") - output = output[0, :].long() - return output - - in_mask = torch.zeros(1, 100, 100, dtype=torch.long, device=device) - in_mask[0, 10:20, 10:20] = 1 - in_mask[0, 5:15, 12:23] = 2 - - expected_mask = _compute_expected_mask(in_mask, top, left, height, width, size) - output_mask = F.resized_crop_mask(in_mask, top, left, height, width, size) - torch.testing.assert_close(output_mask, expected_mask) - - def _parse_padding(padding): if isinstance(padding, int): return [padding] * 4 @@ -1130,63 +623,6 @@ def test_correctness_pad_segmentation_mask_on_fixed_input(device): torch.testing.assert_close(out_mask, expected_mask) -@pytest.mark.parametrize("padding", [[1, 2, 3, 4], [1], 1, [1, 2]]) -@pytest.mark.parametrize("padding_mode", ["constant", "edge", "reflect", "symmetric"]) -def test_correctness_pad_mask(padding, padding_mode): - def _compute_expected_mask(mask, padding_, padding_mode_): - h, w = mask.shape[-2], mask.shape[-1] - pad_left, pad_up, pad_right, pad_down = _parse_padding(padding_) - - if any(pad <= 0 for pad in [pad_left, pad_up, pad_right, pad_down]): - raise pytest.UsageError( - "Expected output can be computed on positive pad values only, " - "but F.pad_* can also crop for negative values" - ) - - new_h = h + pad_up + pad_down - new_w = w + pad_left + pad_right - - new_shape = (*mask.shape[:-2], new_h, new_w) if len(mask.shape) > 2 else (new_h, new_w) - output = torch.zeros(new_shape, dtype=mask.dtype) - output[..., pad_up:-pad_down, pad_left:-pad_right] = mask - - if padding_mode_ == "edge": - # pad top-left corner, left vertical block, bottom-left corner - output[..., :pad_up, :pad_left] = mask[..., 0, 0].unsqueeze(-1).unsqueeze(-2) - output[..., pad_up:-pad_down, :pad_left] = mask[..., :, 0].unsqueeze(-1) - output[..., -pad_down:, :pad_left] = mask[..., -1, 0].unsqueeze(-1).unsqueeze(-2) - # pad top-right corner, right vertical block, bottom-right corner - output[..., :pad_up, -pad_right:] = mask[..., 0, -1].unsqueeze(-1).unsqueeze(-2) - output[..., pad_up:-pad_down, -pad_right:] = mask[..., :, -1].unsqueeze(-1) - output[..., -pad_down:, -pad_right:] = mask[..., -1, -1].unsqueeze(-1).unsqueeze(-2) - # pad top and bottom horizontal blocks - output[..., :pad_up, pad_left:-pad_right] = mask[..., 0, :].unsqueeze(-2) - output[..., -pad_down:, pad_left:-pad_right] = mask[..., -1, :].unsqueeze(-2) - elif padding_mode_ in ("reflect", "symmetric"): - d1 = 1 if padding_mode_ == "reflect" else 0 - d2 = -1 if padding_mode_ == "reflect" else None - both = (-1, -2) - # pad top-left corner, left vertical block, bottom-left corner - output[..., :pad_up, :pad_left] = mask[..., d1 : pad_up + d1, d1 : pad_left + d1].flip(both) - output[..., pad_up:-pad_down, :pad_left] = mask[..., :, d1 : pad_left + d1].flip(-1) - output[..., -pad_down:, :pad_left] = mask[..., -pad_down - d1 : d2, d1 : pad_left + d1].flip(both) - # pad top-right corner, right vertical block, bottom-right corner - output[..., :pad_up, -pad_right:] = mask[..., d1 : pad_up + d1, -pad_right - d1 : d2].flip(both) - output[..., pad_up:-pad_down, -pad_right:] = mask[..., :, -pad_right - d1 : d2].flip(-1) - output[..., -pad_down:, -pad_right:] = mask[..., -pad_down - d1 : d2, -pad_right - d1 : d2].flip(both) - # pad top and bottom horizontal blocks - output[..., :pad_up, pad_left:-pad_right] = mask[..., d1 : pad_up + d1, :].flip(-2) - output[..., -pad_down:, pad_left:-pad_right] = mask[..., -pad_down - d1 : d2, :].flip(-2) - - return output - - for mask in make_masks(): - out_mask = F.pad_mask(mask, padding, padding_mode=padding_mode) - - expected_mask = _compute_expected_mask(mask, padding, padding_mode) - torch.testing.assert_close(out_mask, expected_mask) - - @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize( "startpoints, endpoints", @@ -1272,64 +708,6 @@ def _compute_expected_bbox(bbox, pcoeffs_): torch.testing.assert_close(output_bboxes, expected_bboxes, rtol=0, atol=1) -@pytest.mark.parametrize("device", cpu_and_gpu()) -@pytest.mark.parametrize( - "startpoints, endpoints", - [ - # FIXME: this configuration leads to a difference in a single pixel - # [[[0, 0], [33, 0], [33, 25], [0, 25]], [[3, 2], [32, 3], [30, 24], [2, 25]]], - [[[3, 2], [32, 3], [30, 24], [2, 25]], [[0, 0], [33, 0], [33, 25], [0, 25]]], - [[[3, 2], [32, 3], [30, 24], [2, 25]], [[5, 5], [30, 3], [33, 19], [4, 25]]], - ], -) -def test_correctness_perspective_mask(device, startpoints, endpoints): - def _compute_expected_mask(mask, pcoeffs_): - assert mask.ndim == 3 - m1 = np.array([[pcoeffs_[0], pcoeffs_[1], pcoeffs_[2]], [pcoeffs_[3], pcoeffs_[4], pcoeffs_[5]]]) - m2 = np.array([[pcoeffs_[6], pcoeffs_[7], 1.0], [pcoeffs_[6], pcoeffs_[7], 1.0]]) - - expected_mask = torch.zeros_like(mask.cpu()) - for out_y in range(expected_mask.shape[1]): - for out_x in range(expected_mask.shape[2]): - output_pt = np.array([out_x + 0.5, out_y + 0.5, 1.0]) - - numer = np.matmul(output_pt, m1.T) - denom = np.matmul(output_pt, m2.T) - input_pt = np.floor(numer / denom).astype(np.int32) - - in_x, in_y = input_pt[:2] - if 0 <= in_x < mask.shape[2] and 0 <= in_y < mask.shape[1]: - for i in range(expected_mask.shape[0]): - expected_mask[i, out_y, out_x] = mask[i, in_y, in_x] - return expected_mask.to(mask.device) - - pcoeffs = _get_perspective_coeffs(startpoints, endpoints) - - # FIXME: `_compute_expected_mask` currently only works for "detection" masks. Extend it for "segmentation" masks. - for mask in make_detection_masks(extra_dims=((), (4,))): - mask = mask.to(device) - - output_mask = F.perspective_mask( - mask, - perspective_coeffs=pcoeffs, - ) - - if mask.ndim < 4: - masks = [mask] - else: - masks = [m for m in mask] - - expected_masks = [] - for mask in masks: - expected_mask = _compute_expected_mask(mask, pcoeffs) - expected_masks.append(expected_mask) - if len(expected_masks) > 1: - expected_masks = torch.stack(expected_masks) - else: - expected_masks = expected_masks[0] - torch.testing.assert_close(output_mask, expected_masks) - - @pytest.mark.parametrize("device", cpu_and_gpu()) @pytest.mark.parametrize( "output_size", @@ -1463,53 +841,6 @@ def test_correctness_gaussian_blur_image_tensor(device, image_size, dt, ksize, s torch.testing.assert_close(out, true_out, rtol=0.0, atol=1.0, msg=f"{ksize}, {sigma}") -@pytest.mark.parametrize("device", cpu_and_gpu()) -@pytest.mark.parametrize( - "fn, make_samples", - [ - (F.elastic_image_tensor, make_images), - # FIXME: This test currently only works for "detection" masks. Extend it for "segmentation" masks. - (F.elastic_mask, make_detection_masks), - ], -) -def test_correctness_elastic_image_or_mask_tensor(device, fn, make_samples): - in_box = [10, 15, 25, 35] - for sample in make_samples(sizes=((64, 76),), extra_dims=((), (4,))): - c, h, w = sample.shape[-3:] - # Setup a dummy image with 4 points - sample[..., in_box[1], in_box[0]] = torch.arange(10, 10 + c) - sample[..., in_box[3] - 1, in_box[0]] = torch.arange(20, 20 + c) - sample[..., in_box[3] - 1, in_box[2] - 1] = torch.arange(30, 30 + c) - sample[..., in_box[1], in_box[2] - 1] = torch.arange(40, 40 + c) - sample = sample.to(device) - - if fn == F.elastic_image_tensor: - sample = features.Image(sample) - kwargs = {"interpolation": F.InterpolationMode.NEAREST} - else: - sample = features.Mask(sample) - kwargs = {} - - # Create a displacement grid using sin - n, m = 5.0, 0.1 - d1 = m * torch.sin(torch.arange(h, dtype=torch.float) * torch.pi * n / h) - d2 = m * torch.sin(torch.arange(w, dtype=torch.float) * torch.pi * n / w) - - d1 = d1[:, None].expand((h, w)) - d2 = d2[None, :].expand((h, w)) - - displacement = torch.cat([d1[..., None], d2[..., None]], dim=-1) - displacement = displacement.reshape(1, h, w, 2) - - output = fn(sample, displacement=displacement, **kwargs) - - # Check places where transformed points should be - torch.testing.assert_close(output[..., 12, 9], sample[..., in_box[1], in_box[0]]) - torch.testing.assert_close(output[..., 17, 27], sample[..., in_box[1], in_box[2] - 1]) - torch.testing.assert_close(output[..., 31, 6], sample[..., in_box[3] - 1, in_box[0]]) - torch.testing.assert_close(output[..., 37, 23], sample[..., in_box[3] - 1, in_box[2] - 1]) - - def test_midlevel_normalize_output_type(): inpt = torch.rand(1, 3, 32, 32) output = F.normalize(inpt, mean=[0.5, 0.5, 0.5], std=[1.0, 1.0, 1.0]) diff --git a/test/test_prototype_transforms_kernels.py b/test/test_prototype_transforms_kernels.py index 4ad17272059..b4d048c34a1 100644 --- a/test/test_prototype_transforms_kernels.py +++ b/test/test_prototype_transforms_kernels.py @@ -35,42 +35,10 @@ def test_coverage(): "adjust_gamma_image_tensor", "adjust_hue_image_tensor", "adjust_saturation_image_tensor", - "adjust_sharpness_image_tensor", - "autocontrast_image_tensor", - "center_crop_bounding_box", - "center_crop_image_tensor", - "center_crop_mask", "clamp_bounding_box", - "crop_bounding_box", - "crop_image_tensor", - "crop_mask", - "elastic_bounding_box", - "elastic_image_tensor", - "elastic_mask", - "equalize_image_tensor", - "erase_image_tensor", "five_crop_image_tensor", - "gaussian_blur_image_tensor", - "invert_image_tensor", "normalize_image_tensor", - "pad_bounding_box", - "pad_image_tensor", - "pad_mask", - "perspective_bounding_box", - "perspective_image_tensor", - "perspective_mask", - "posterize_image_tensor", - "resized_crop_bounding_box", - "resized_crop_image_tensor", - "resized_crop_mask", - "rotate_bounding_box", - "rotate_image_tensor", - "rotate_mask", - "solarize_image_tensor", "ten_crop_image_tensor", - "vertical_flip_bounding_box", - "vertical_flip_image_tensor", - "vertical_flip_mask", } } @@ -191,7 +159,7 @@ def test_dtype_and_device_consistency(self, info, args_kwargs, device): output = info.kernel(input, *other_args, **kwargs) assert output.dtype == input.dtype - assert output.device == torch.device(device) + assert output.device == input.device @pytest.mark.parametrize( ("info", "args_kwargs"),