Updated comments and added another test case

vfdev-5 · vfdev-5 · commit a24fca7a24cd · 2022-03-14T14:27:28.000Z
diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
@@ -317,7 +317,7 @@ def _compute_expected_bbox(bbox, angle_, translate_, scale_, shear_, center_):
                 [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
             ]
         )
-        transformed_points = points @ true_matrix.T
+        transformed_points = np.matmul(points, true_matrix.T)
         out_bbox = [
             np.min(transformed_points[:, 0]),
             np.min(transformed_points[:, 1]),
@@ -371,3 +371,53 @@ def _compute_expected_bbox(bbox, angle_, translate_, scale_, shear_, center_):
             expected_bboxes = expected_bboxes.squeeze(0)
 
         torch.testing.assert_close(output_bboxes, expected_bboxes)
+
+
+def test_correctness_affine_bounding_box_on_fixed_input():
+    # Check transformation against known expected output
+    image_size = (64, 64)
+    # xyxy format
+    in_boxes = [
+        [20, 25, 35, 45],
+        [50, 5, 70, 22],
+        [image_size[1] // 2 - 10, image_size[0] // 2 - 10, image_size[1] // 2 + 10, image_size[0] // 2 + 10],
+        [1, 1, 5, 5],
+    ]
+    in_boxes = features.BoundingBox(
+        in_boxes, format=features.BoundingBoxFormat.XYXY, image_size=image_size, dtype=torch.float64
+    )
+    # Tested parameters
+    angle = 63
+    scale = 0.89
+    dx = 0.12
+    dy = 0.23
+
+    # Expected bboxes computed using albumentations:
+    # from albumentations.augmentations.geometric.functional import bbox_shift_scale_rotate
+    # from albumentations.augmentations.geometric.functional import normalize_bbox, denormalize_bbox
+    # expected_bboxes = []
+    # for in_box in in_boxes:
+    #     n_in_box = normalize_bbox(in_box, *image_size)
+    #     n_out_box = bbox_shift_scale_rotate(n_in_box, -angle, scale, dx, dy, *image_size)
+    #     out_box = denormalize_bbox(n_out_box, *image_size)
+    #     expected_bboxes.append(out_box)
+    expected_bboxes = [
+        (24.522435977922218, 34.375689508290854, 46.443125279998114, 54.3516575015695),
+        (54.88288587110401, 50.08453280875634, 76.44484547743795, 72.81332520036864),
+        (27.709526487041554, 34.74952648704156, 51.650473512958435, 58.69047351295844),
+        (48.56528888843238, 9.611532109828834, 53.35347829361575, 14.39972151501221),
+    ]
+
+    output_boxes = F.affine_bounding_box(
+        in_boxes,
+        in_boxes.format,
+        in_boxes.image_size,
+        angle,
+        (dx * image_size[1], dy * image_size[0]),
+        scale,
+        shear=(0, 0),
+    )
+
+    assert len(output_boxes) == len(expected_bboxes)
+    for a_out_box, out_box in zip(expected_bboxes, output_boxes):
+        np.testing.assert_allclose(out_box.cpu().numpy(), a_out_box)
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
@@ -204,18 +204,22 @@ def affine_bounding_box(
         dtype=dtype,
         device=device,
     ).view(2, 3)
-    # bboxes to 4 points like:
-    # [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1), ...]
+    # 1) Let's transform bboxes into a tensor of 4 points (top-left, top-right, bottom-left, bottom-right corners).
+    # Tensor of points has shape (N * 4, 3), where N is the number of bboxes
+    # Single point structure is similar to
+    # [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1)]
     points = bounding_box[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].view(-1, 2)
     points = torch.cat([points, torch.ones(points.shape[0], 1)], dim=-1)
-    transformed_points = points @ affine_matrix.T
-    # reshape transformed points to [N boxes, 4 points, x/y coords]
+    # 2) Now let's transform the points using affine matrix
+    transformed_points = torch.matmul(points, affine_matrix.T)
+    # 3) Reshape transformed points to [N boxes, 4 points, x/y coords]
+    # and compute bounding box from 4 transformed points:
     transformed_points = transformed_points.view(-1, 4, 2)
-    # compute bounding box from 4 transformed points:
     out_bbox_mins, _ = torch.min(transformed_points, dim=1)
     out_bbox_maxs, _ = torch.max(transformed_points, dim=1)
     out_bboxes = torch.cat([out_bbox_mins, out_bbox_maxs], dim=1)
     # out_bboxes should be of shape [N boxes, 4]
+
     return convert_bounding_box_format(out_bboxes, old_format=features.BoundingBoxFormat.XYXY, new_format=format).view(
         original_shape
     )