Skip to content

Commit 64fab11

Browse files
Yosua Michael Maranathafacebook-github-bot
authored andcommitted
[fbsync] Avoid recommuting the affine matrix in bbox rotate (#6712)
Summary: * Avoid recommuting the affine matrix in bbox rotate * Fix linter * inverted=True for estimating image size * Update the image size estimation to match the one from the image kernel * Nits * Address comments. * Center=0,0 when expand=true Reviewed By: NicolasHug Differential Revision: D40427463 fbshipit-source-id: 87e147e8a52c18d90601c6de4b1b182c60b9d8e9
1 parent b13b467 commit 64fab11

File tree

1 file changed

+26
-25
lines changed

1 file changed

+26
-25
lines changed

torchvision/prototype/transforms/functional/_geometry.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -279,9 +279,9 @@ def affine_image_tensor(
279279
center_f = [0.0, 0.0]
280280
if center is not None:
281281
# Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
282-
center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])]
282+
center_f = [(c - s * 0.5) for c, s in zip(center, [width, height])]
283283

284-
translate_f = [1.0 * t for t in translate]
284+
translate_f = [float(t) for t in translate]
285285
matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale, shear)
286286

287287
output = _FT.affine(image, matrix, interpolation=interpolation.value, fill=fill)
@@ -321,7 +321,7 @@ def _affine_bounding_box_xyxy(
321321
shear: List[float],
322322
center: Optional[List[float]] = None,
323323
expand: bool = False,
324-
) -> torch.Tensor:
324+
) -> Tuple[torch.Tensor, Tuple[int, int]]:
325325
angle, translate, shear, center = _affine_parse_args(
326326
angle, translate, scale, shear, InterpolationMode.NEAREST, center
327327
)
@@ -333,19 +333,24 @@ def _affine_bounding_box_xyxy(
333333
dtype = bounding_box.dtype if torch.is_floating_point(bounding_box) else torch.float32
334334
device = bounding_box.device
335335

336-
affine_matrix = torch.tensor(
337-
_get_inverse_affine_matrix(center, angle, translate, scale, shear, inverted=False),
338-
dtype=dtype,
339-
device=device,
340-
).view(2, 3)
336+
affine_vector = _get_inverse_affine_matrix(center, angle, translate, scale, shear, inverted=False)
337+
transposed_affine_matrix = (
338+
torch.tensor(
339+
affine_vector,
340+
dtype=dtype,
341+
device=device,
342+
)
343+
.view(2, 3)
344+
.T
345+
)
341346
# 1) Let's transform bboxes into a tensor of 4 points (top-left, top-right, bottom-left, bottom-right corners).
342347
# Tensor of points has shape (N * 4, 3), where N is the number of bboxes
343348
# Single point structure is similar to
344349
# [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1)]
345350
points = bounding_box[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].view(-1, 2)
346351
points = torch.cat([points, torch.ones(points.shape[0], 1, device=points.device)], dim=-1)
347352
# 2) Now let's transform the points using affine matrix
348-
transformed_points = torch.matmul(points, affine_matrix.T)
353+
transformed_points = torch.matmul(points, transposed_affine_matrix)
349354
# 3) Reshape transformed points to [N boxes, 4 points, x/y coords]
350355
# and compute bounding box from 4 transformed points:
351356
transformed_points = transformed_points.view(-1, 4, 2)
@@ -360,20 +365,24 @@ def _affine_bounding_box_xyxy(
360365
points = torch.tensor(
361366
[
362367
[0.0, 0.0, 1.0],
363-
[0.0, 1.0 * height, 1.0],
364-
[1.0 * width, 1.0 * height, 1.0],
365-
[1.0 * width, 0.0, 1.0],
368+
[0.0, float(height), 1.0],
369+
[float(width), float(height), 1.0],
370+
[float(width), 0.0, 1.0],
366371
],
367372
dtype=dtype,
368373
device=device,
369374
)
370-
new_points = torch.matmul(points, affine_matrix.T)
375+
new_points = torch.matmul(points, transposed_affine_matrix)
371376
tr, _ = torch.min(new_points, dim=0, keepdim=True)
372377
# Translate bounding boxes
373378
out_bboxes[:, 0::2] = out_bboxes[:, 0::2] - tr[:, 0]
374379
out_bboxes[:, 1::2] = out_bboxes[:, 1::2] - tr[:, 1]
380+
# Estimate meta-data for image with inverted=True and with center=[0,0]
381+
affine_vector = _get_inverse_affine_matrix([0.0, 0.0], angle, translate, scale, shear)
382+
new_width, new_height = _FT._compute_affine_output_size(affine_vector, width, height)
383+
image_size = (new_height, new_width)
375384

376-
return out_bboxes.to(bounding_box.dtype)
385+
return out_bboxes.to(bounding_box.dtype), image_size
377386

378387

379388
def affine_bounding_box(
@@ -391,7 +400,7 @@ def affine_bounding_box(
391400
bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY
392401
).view(-1, 4)
393402

394-
out_bboxes = _affine_bounding_box_xyxy(bounding_box, image_size, angle, translate, scale, shear, center)
403+
out_bboxes, _ = _affine_bounding_box_xyxy(bounding_box, image_size, angle, translate, scale, shear, center)
395404

396405
# out_bboxes should be of shape [N boxes, 4]
397406

@@ -502,7 +511,7 @@ def rotate_image_tensor(
502511
warnings.warn("The provided center argument has no effect on the result if expand is True")
503512
else:
504513
# Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
505-
center_f = [1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])]
514+
center_f = [(c - s * 0.5) for c, s in zip(center, [width, height])]
506515

507516
# due to current incoherence of rotation angle direction between affine and rotate implementations
508517
# we need to set -angle.
@@ -558,7 +567,7 @@ def rotate_bounding_box(
558567
bounding_box, old_format=format, new_format=features.BoundingBoxFormat.XYXY
559568
).view(-1, 4)
560569

561-
out_bboxes = _affine_bounding_box_xyxy(
570+
out_bboxes, image_size = _affine_bounding_box_xyxy(
562571
bounding_box,
563572
image_size,
564573
angle=-angle,
@@ -569,14 +578,6 @@ def rotate_bounding_box(
569578
expand=expand,
570579
)
571580

572-
if expand:
573-
# TODO: Move this computation inside of `_affine_bounding_box_xyxy` to avoid computing the rotation and points
574-
# matrix twice
575-
height, width = image_size
576-
rotation_matrix = _get_inverse_affine_matrix([0.0, 0.0], angle, [0.0, 0.0], 1.0, [0.0, 0.0])
577-
new_width, new_height = _FT._compute_affine_output_size(rotation_matrix, width, height)
578-
image_size = (new_height, new_width)
579-
580581
return (
581582
convert_format_bounding_box(
582583
out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format, copy=False

0 commit comments

Comments
 (0)