Skip to content

Commit a24fca7

Browse files
committed
Updated comments and added another test case
1 parent 234f113 commit a24fca7

File tree

2 files changed

+60
-6
lines changed

2 files changed

+60
-6
lines changed

test/test_prototype_transforms_functional.py

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,7 @@ def _compute_expected_bbox(bbox, angle_, translate_, scale_, shear_, center_):
317317
[bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
318318
]
319319
)
320-
transformed_points = points @ true_matrix.T
320+
transformed_points = np.matmul(points, true_matrix.T)
321321
out_bbox = [
322322
np.min(transformed_points[:, 0]),
323323
np.min(transformed_points[:, 1]),
@@ -371,3 +371,53 @@ def _compute_expected_bbox(bbox, angle_, translate_, scale_, shear_, center_):
371371
expected_bboxes = expected_bboxes.squeeze(0)
372372

373373
torch.testing.assert_close(output_bboxes, expected_bboxes)
374+
375+
376+
def test_correctness_affine_bounding_box_on_fixed_input():
377+
# Check transformation against known expected output
378+
image_size = (64, 64)
379+
# xyxy format
380+
in_boxes = [
381+
[20, 25, 35, 45],
382+
[50, 5, 70, 22],
383+
[image_size[1] // 2 - 10, image_size[0] // 2 - 10, image_size[1] // 2 + 10, image_size[0] // 2 + 10],
384+
[1, 1, 5, 5],
385+
]
386+
in_boxes = features.BoundingBox(
387+
in_boxes, format=features.BoundingBoxFormat.XYXY, image_size=image_size, dtype=torch.float64
388+
)
389+
# Tested parameters
390+
angle = 63
391+
scale = 0.89
392+
dx = 0.12
393+
dy = 0.23
394+
395+
# Expected bboxes computed using albumentations:
396+
# from albumentations.augmentations.geometric.functional import bbox_shift_scale_rotate
397+
# from albumentations.augmentations.geometric.functional import normalize_bbox, denormalize_bbox
398+
# expected_bboxes = []
399+
# for in_box in in_boxes:
400+
# n_in_box = normalize_bbox(in_box, *image_size)
401+
# n_out_box = bbox_shift_scale_rotate(n_in_box, -angle, scale, dx, dy, *image_size)
402+
# out_box = denormalize_bbox(n_out_box, *image_size)
403+
# expected_bboxes.append(out_box)
404+
expected_bboxes = [
405+
(24.522435977922218, 34.375689508290854, 46.443125279998114, 54.3516575015695),
406+
(54.88288587110401, 50.08453280875634, 76.44484547743795, 72.81332520036864),
407+
(27.709526487041554, 34.74952648704156, 51.650473512958435, 58.69047351295844),
408+
(48.56528888843238, 9.611532109828834, 53.35347829361575, 14.39972151501221),
409+
]
410+
411+
output_boxes = F.affine_bounding_box(
412+
in_boxes,
413+
in_boxes.format,
414+
in_boxes.image_size,
415+
angle,
416+
(dx * image_size[1], dy * image_size[0]),
417+
scale,
418+
shear=(0, 0),
419+
)
420+
421+
assert len(output_boxes) == len(expected_bboxes)
422+
for a_out_box, out_box in zip(expected_bboxes, output_boxes):
423+
np.testing.assert_allclose(out_box.cpu().numpy(), a_out_box)

torchvision/prototype/transforms/functional/_geometry.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -204,18 +204,22 @@ def affine_bounding_box(
204204
dtype=dtype,
205205
device=device,
206206
).view(2, 3)
207-
# bboxes to 4 points like:
208-
# [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1), ...]
207+
# 1) Let's transform bboxes into a tensor of 4 points (top-left, top-right, bottom-left, bottom-right corners).
208+
# Tensor of points has shape (N * 4, 3), where N is the number of bboxes
209+
# Single point structure is similar to
210+
# [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1)]
209211
points = bounding_box[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].view(-1, 2)
210212
points = torch.cat([points, torch.ones(points.shape[0], 1)], dim=-1)
211-
transformed_points = points @ affine_matrix.T
212-
# reshape transformed points to [N boxes, 4 points, x/y coords]
213+
# 2) Now let's transform the points using affine matrix
214+
transformed_points = torch.matmul(points, affine_matrix.T)
215+
# 3) Reshape transformed points to [N boxes, 4 points, x/y coords]
216+
# and compute bounding box from 4 transformed points:
213217
transformed_points = transformed_points.view(-1, 4, 2)
214-
# compute bounding box from 4 transformed points:
215218
out_bbox_mins, _ = torch.min(transformed_points, dim=1)
216219
out_bbox_maxs, _ = torch.max(transformed_points, dim=1)
217220
out_bboxes = torch.cat([out_bbox_mins, out_bbox_maxs], dim=1)
218221
# out_bboxes should be of shape [N boxes, 4]
222+
219223
return convert_bounding_box_format(out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format).view(
220224
original_shape
221225
)

0 commit comments

Comments
 (0)