Skip to content

[proto] Added RandomPerspective and tests #6284

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 90 additions & 18 deletions test/test_prototype_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ def test__transform(self, padding, fill, padding_mode, mocker):
transform = transforms.Pad(padding, fill=fill, padding_mode=padding_mode)

fn = mocker.patch("torchvision.prototype.transforms.functional.pad")
inpt = mocker.MagicMock(spec=torch.Tensor)
inpt = mocker.MagicMock(spec=features.Image)
_ = transform(inpt)

fn.assert_called_once_with(inpt, padding=padding, fill=fill, padding_mode=padding_mode)
Expand All @@ -369,11 +369,12 @@ def test_assertions(self):

@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]])
def test__get_params(self, fill, side_range):
def test__get_params(self, fill, side_range, mocker):
transform = transforms.RandomZoomOut(fill=fill, side_range=side_range)

image = features.Image(torch.rand(1, 3, 32, 32))
c, h, w = image.shape[-3:]
image = mocker.MagicMock(spec=features.Image)
c = image.num_channels = 3
h, w = image.image_size = (24, 32)

params = transform._get_params(image)

Expand All @@ -387,19 +388,22 @@ def test__get_params(self, fill, side_range):
@pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
@pytest.mark.parametrize("side_range", [(1.0, 4.0), [2.0, 5.0]])
def test__transform(self, fill, side_range, mocker):
image = features.Image(torch.rand(1, 3, 32, 32))
inpt = mocker.MagicMock(spec=features.Image)
inpt.num_channels = 3
inpt.image_size = (24, 32)

transform = transforms.RandomZoomOut(fill=fill, side_range=side_range, p=1)

fn = mocker.patch("torchvision.prototype.transforms.functional.pad")
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
torch.manual_seed(12)
_ = transform(image)
_ = transform(inpt)
torch.manual_seed(12)
torch.rand(1) # random apply changes random state
params = transform._get_params(image)
params = transform._get_params(inpt)

fn.assert_called_once_with(image, **params)
fn.assert_called_once_with(inpt, **params)


class TestRandomRotation:
Expand Down Expand Up @@ -449,7 +453,7 @@ def test__transform(self, degrees, expand, fill, center, mocker):
assert transform.degrees == [float(-degrees), float(degrees)]

fn = mocker.patch("torchvision.prototype.transforms.functional.rotate")
inpt = mocker.MagicMock(spec=torch.Tensor)
inpt = mocker.MagicMock(spec=features.Image)
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
torch.manual_seed(12)
Expand Down Expand Up @@ -504,9 +508,11 @@ def test_assertions(self):
@pytest.mark.parametrize("translate", [None, [0.1, 0.2]])
@pytest.mark.parametrize("scale", [None, [0.7, 1.2]])
@pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]])
def test__get_params(self, degrees, translate, scale, shear):
image = features.Image(torch.rand(1, 3, 32, 32))
h, w = image.shape[-2:]
def test__get_params(self, degrees, translate, scale, shear, mocker):
image = mocker.MagicMock(spec=features.Image)
image.num_channels = 3
image.image_size = (24, 32)
h, w = image.image_size

transform = transforms.RandomAffine(degrees, translate=translate, scale=scale, shear=shear)
params = transform._get_params(image)
Expand Down Expand Up @@ -564,7 +570,10 @@ def test__transform(self, degrees, translate, scale, shear, fill, center, mocker
assert transform.degrees == [float(-degrees), float(degrees)]

fn = mocker.patch("torchvision.prototype.transforms.functional.affine")
inpt = features.Image(torch.rand(1, 3, 32, 32))
inpt = mocker.MagicMock(spec=features.Image)
inpt.num_channels = 3
inpt.image_size = (24, 32)

# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
torch.manual_seed(12)
Expand Down Expand Up @@ -592,9 +601,11 @@ def test_assertions(self):
with pytest.raises(ValueError, match="Padding mode should be either"):
transforms.RandomCrop([10, 12], padding=1, padding_mode="abc")

def test__get_params(self):
image = features.Image(torch.rand(1, 3, 32, 32))
h, w = image.shape[-2:]
def test__get_params(self, mocker):
image = mocker.MagicMock(spec=features.Image)
image.num_channels = 3
image.image_size = (24, 32)
h, w = image.image_size

transform = transforms.RandomCrop([10, 10])
params = transform._get_params(image)
Expand All @@ -614,7 +625,10 @@ def test_forward(self, padding, pad_if_needed, fill, padding_mode, mocker):
output_size, padding=padding, pad_if_needed=pad_if_needed, fill=fill, padding_mode=padding_mode
)

inpt = features.Image(torch.rand(1, 3, 32, 32))
inpt = mocker.MagicMock(spec=features.Image)
inpt.num_channels = 3
inpt.image_size = (32, 32)

expected = mocker.MagicMock(spec=features.Image)
expected.num_channels = 3
if isinstance(padding, int):
Expand Down Expand Up @@ -696,7 +710,10 @@ def test__transform(self, kernel_size, sigma, mocker):
assert transform.sigma == (sigma, sigma)

fn = mocker.patch("torchvision.prototype.transforms.functional.gaussian_blur")
inpt = features.Image(torch.rand(1, 3, 32, 32))
inpt = mocker.MagicMock(spec=features.Image)
inpt.num_channels = 3
inpt.image_size = (24, 32)

# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
torch.manual_seed(12)
Expand Down Expand Up @@ -730,3 +747,58 @@ def test__transform(self, p, transform_cls, func_op_name, kwargs, mocker):
fn.assert_called_once_with(inpt, **kwargs)
else:
fn.call_count == 0


class TestRandomPerspective:
def test_assertions(self):
with pytest.raises(ValueError, match="Argument distortion_scale value should be between 0 and 1"):
transforms.RandomPerspective(distortion_scale=-1.0)

with pytest.raises(TypeError, match="Got inappropriate fill arg"):
transforms.RandomPerspective(0.5, fill="abc")

def test__get_params(self, mocker):
dscale = 0.5
transform = transforms.RandomPerspective(dscale)
image = mocker.MagicMock(spec=features.Image)
image.num_channels = 3
image.image_size = (24, 32)

params = transform._get_params(image)

h, w = image.image_size
assert len(params["startpoints"]) == 4
for x, y in params["startpoints"]:
assert x in (0, w - 1)
assert y in (0, h - 1)

assert len(params["endpoints"]) == 4
for (x, y), name in zip(params["endpoints"], ["tl", "tr", "br", "bl"]):
if "t" in name:
assert 0 <= y <= int(dscale * h // 2), (x, y, name)
if "b" in name:
assert h - int(dscale * h // 2) - 1 <= y <= h, (x, y, name)
if "l" in name:
assert 0 <= x <= int(dscale * w // 2), (x, y, name)
if "r" in name:
assert w - int(dscale * w // 2) - 1 <= x <= w, (x, y, name)

@pytest.mark.parametrize("distortion_scale", [0.1, 0.7])
def test__transform(self, distortion_scale, mocker):
interpolation = InterpolationMode.BILINEAR
fill = 12
transform = transforms.RandomPerspective(distortion_scale, fill=fill, interpolation=interpolation)

fn = mocker.patch("torchvision.prototype.transforms.functional.perspective")
inpt = mocker.MagicMock(spec=features.Image)
inpt.num_channels = 3
inpt.image_size = (24, 32)
# vfdev-5, Feature Request: let's store params as Transform attribute
# This could be also helpful for users
torch.manual_seed(12)
_ = transform(inpt)
torch.manual_seed(12)
torch.rand(1) # random apply changes random state
params = transform._get_params(inpt)

fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation)
4 changes: 3 additions & 1 deletion test/test_prototype_transforms_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,9 +599,11 @@ def test_scriptable(kernel):
and all(
feature_type not in name for feature_type in {"image", "segmentation_mask", "bounding_box", "label", "pil"}
)
and name not in {"to_image_tensor", "InterpolationMode", "decode_video_with_av", "crop", "rotate"}
and name
not in {"to_image_tensor", "InterpolationMode", "decode_video_with_av", "crop", "rotate", "perspective"}
# We skip 'crop' due to missing 'height' and 'width'
# We skip 'rotate' due to non implemented yet expand=True case for bboxes
# We skip 'perspective' as it requires different input args than perspective_image_tensor etc
],
)
def test_functional_mid_level(func):
Expand Down
1 change: 1 addition & 0 deletions torchvision/prototype/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
RandomZoomOut,
RandomRotation,
RandomAffine,
RandomPerspective,
)
from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace
from ._misc import Identity, GaussianBlur, Normalize, ToDtype, Lambda
Expand Down
58 changes: 58 additions & 0 deletions torchvision/prototype/transforms/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ def _get_params(self, sample: Any) -> Dict[str, Any]:
bottom = canvas_height - (top + orig_h)
padding = [left, top, right, bottom]

# vfdev-5: Can we put that into pad_image_tensor ?
fill = self.fill
if not isinstance(fill, collections.abc.Sequence):
fill = [fill] * orig_c
Expand Down Expand Up @@ -493,3 +494,60 @@ def forward(self, *inputs: Any) -> Any:
flat_inputs, spec = tree_flatten(sample)
out_flat_inputs = self._forward(flat_inputs)
return tree_unflatten(out_flat_inputs, spec)


class RandomPerspective(_RandomApplyTransform):
def __init__(
self,
distortion_scale: float,
fill: Union[int, float, Sequence[int], Sequence[float]] = 0,
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
p: float = 0.5,
) -> None:
super().__init__(p=p)

_check_fill_arg(fill)
if not (0 <= distortion_scale <= 1):
raise ValueError("Argument distortion_scale value should be between 0 and 1")

self.distortion_scale = distortion_scale
self.interpolation = interpolation
self.fill = fill

def _get_params(self, sample: Any) -> Dict[str, Any]:
# Get image size
# TODO: make it work with bboxes and segm masks
image = query_image(sample)
_, height, width = get_image_dimensions(image)

distortion_scale = self.distortion_scale

half_height = height // 2
half_width = width // 2
topleft = [
int(torch.randint(0, int(distortion_scale * half_width) + 1, size=(1,)).item()),
int(torch.randint(0, int(distortion_scale * half_height) + 1, size=(1,)).item()),
]
topright = [
int(torch.randint(width - int(distortion_scale * half_width) - 1, width, size=(1,)).item()),
int(torch.randint(0, int(distortion_scale * half_height) + 1, size=(1,)).item()),
]
botright = [
int(torch.randint(width - int(distortion_scale * half_width) - 1, width, size=(1,)).item()),
int(torch.randint(height - int(distortion_scale * half_height) - 1, height, size=(1,)).item()),
]
botleft = [
int(torch.randint(0, int(distortion_scale * half_width) + 1, size=(1,)).item()),
int(torch.randint(height - int(distortion_scale * half_height) - 1, height, size=(1,)).item()),
]
startpoints = [[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]]
endpoints = [topleft, topright, botright, botleft]
return dict(startpoints=startpoints, endpoints=endpoints)

def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
return F.perspective(
inpt,
**params,
fill=self.fill,
interpolation=self.interpolation,
)
6 changes: 5 additions & 1 deletion torchvision/prototype/transforms/functional/_geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
_get_inverse_affine_matrix,
InterpolationMode,
_compute_output_size,
_get_perspective_coeffs,
)

from ._meta import convert_bounding_box_format, get_dimensions_image_tensor, get_dimensions_image_pil
Expand Down Expand Up @@ -765,10 +766,13 @@ def perspective_segmentation_mask(img: torch.Tensor, perspective_coeffs: List[fl

def perspective(
inpt: DType,
perspective_coeffs: List[float],
startpoints: List[List[int]],
Copy link
Collaborator Author

@vfdev-5 vfdev-5 Jul 18, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I missed in the previous work that perspective mid-level op has to match F.perspective from stable one. So, now mid-level perspective matches:

def perspective(
img: Tensor,
startpoints: List[List[int]],
endpoints: List[List[int]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: Optional[List[float]] = None,
) -> Tensor:

endpoints: List[List[int]],
interpolation: InterpolationMode = InterpolationMode.BILINEAR,
fill: Optional[Union[int, float, Sequence[int], Sequence[float]]] = None,
) -> DType:
perspective_coeffs = _get_perspective_coeffs(startpoints, endpoints)

if isinstance(inpt, features._Feature):
return inpt.perspective(perspective_coeffs, interpolation=interpolation, fill=fill)
elif isinstance(inpt, PIL.Image.Image):
Expand Down