11
11
from torch .nn .functional import one_hot
12
12
from torchvision .prototype import features
13
13
from torchvision .prototype .transforms .functional ._meta import convert_bounding_box_format
14
+ from torchvision .transforms .functional import _get_perspective_coeffs
14
15
from torchvision .transforms .functional_tensor import _max_value as get_max_value
15
16
17
+
16
18
make_tensor = functools .partial (torch .testing .make_tensor , device = "cpu" )
17
19
18
20
@@ -380,6 +382,37 @@ def pad_segmentation_mask():
380
382
yield SampleInput (mask , padding = padding , padding_mode = padding_mode )
381
383
382
384
385
+ @register_kernel_info_from_sample_inputs_fn
386
+ def perspective_bounding_box ():
387
+ for bounding_box , perspective_coeffs in itertools .product (
388
+ make_bounding_boxes (),
389
+ [
390
+ [1.2405 , 0.1772 , - 6.9113 , 0.0463 , 1.251 , - 5.235 , 0.00013 , 0.0018 ],
391
+ [0.7366 , - 0.11724 , 1.45775 , - 0.15012 , 0.73406 , 2.6019 , - 0.0072 , - 0.0063 ],
392
+ ],
393
+ ):
394
+ yield SampleInput (
395
+ bounding_box ,
396
+ format = bounding_box .format ,
397
+ perspective_coeffs = perspective_coeffs ,
398
+ )
399
+
400
+
401
+ @register_kernel_info_from_sample_inputs_fn
402
+ def perspective_segmentation_mask ():
403
+ for mask , perspective_coeffs in itertools .product (
404
+ make_segmentation_masks (extra_dims = ((), (4 ,))),
405
+ [
406
+ [1.2405 , 0.1772 , - 6.9113 , 0.0463 , 1.251 , - 5.235 , 0.00013 , 0.0018 ],
407
+ [0.7366 , - 0.11724 , 1.45775 , - 0.15012 , 0.73406 , 2.6019 , - 0.0072 , - 0.0063 ],
408
+ ],
409
+ ):
410
+ yield SampleInput (
411
+ mask ,
412
+ perspective_coeffs = perspective_coeffs ,
413
+ )
414
+
415
+
383
416
@pytest .mark .parametrize (
384
417
"kernel" ,
385
418
[
@@ -985,7 +1018,7 @@ def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device):
985
1018
],
986
1019
)
987
1020
def test_correctness_resized_crop_bounding_box (device , format , top , left , height , width , size ):
988
- def _compute_expected (bbox , top_ , left_ , height_ , width_ , size_ ):
1021
+ def _compute_expected_bbox (bbox , top_ , left_ , height_ , width_ , size_ ):
989
1022
# bbox should be xyxy
990
1023
bbox [0 ] = (bbox [0 ] - left_ ) * size_ [1 ] / width_
991
1024
bbox [1 ] = (bbox [1 ] - top_ ) * size_ [0 ] / height_
@@ -1001,7 +1034,7 @@ def _compute_expected(bbox, top_, left_, height_, width_, size_):
1001
1034
]
1002
1035
expected_bboxes = []
1003
1036
for in_box in in_boxes :
1004
- expected_bboxes .append (_compute_expected (list (in_box ), top , left , height , width , size ))
1037
+ expected_bboxes .append (_compute_expected_bbox (list (in_box ), top , left , height , width , size ))
1005
1038
expected_bboxes = torch .tensor (expected_bboxes , device = device )
1006
1039
1007
1040
in_boxes = features .BoundingBox (
@@ -1027,7 +1060,7 @@ def _compute_expected(bbox, top_, left_, height_, width_, size_):
1027
1060
],
1028
1061
)
1029
1062
def test_correctness_resized_crop_segmentation_mask (device , top , left , height , width , size ):
1030
- def _compute_expected (mask , top_ , left_ , height_ , width_ , size_ ):
1063
+ def _compute_expected_mask (mask , top_ , left_ , height_ , width_ , size_ ):
1031
1064
output = mask .clone ()
1032
1065
output = output [:, top_ : top_ + height_ , left_ : left_ + width_ ]
1033
1066
output = torch .nn .functional .interpolate (output [None , :].float (), size = size_ , mode = "nearest" )
@@ -1038,7 +1071,7 @@ def _compute_expected(mask, top_, left_, height_, width_, size_):
1038
1071
in_mask [0 , 10 :20 , 10 :20 ] = 1
1039
1072
in_mask [0 , 5 :15 , 12 :23 ] = 2
1040
1073
1041
- expected_mask = _compute_expected (in_mask , top , left , height , width , size )
1074
+ expected_mask = _compute_expected_mask (in_mask , top , left , height , width , size )
1042
1075
output_mask = F .resized_crop_segmentation_mask (in_mask , top , left , height , width , size )
1043
1076
torch .testing .assert_close (output_mask , expected_mask )
1044
1077
@@ -1085,3 +1118,158 @@ def parse_padding():
1085
1118
1086
1119
expected_mask = _compute_expected_mask ()
1087
1120
torch .testing .assert_close (out_mask , expected_mask )
1121
+
1122
+
1123
+ @pytest .mark .parametrize ("device" , cpu_and_gpu ())
1124
+ @pytest .mark .parametrize (
1125
+ "startpoints, endpoints" ,
1126
+ [
1127
+ [[[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]], [[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]]],
1128
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]]],
1129
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[5 , 5 ], [30 , 3 ], [33 , 19 ], [4 , 25 ]]],
1130
+ ],
1131
+ )
1132
+ def test_correctness_perspective_bounding_box (device , startpoints , endpoints ):
1133
+ def _compute_expected_bbox (bbox , pcoeffs_ ):
1134
+ m1 = np .array (
1135
+ [
1136
+ [pcoeffs_ [0 ], pcoeffs_ [1 ], pcoeffs_ [2 ]],
1137
+ [pcoeffs_ [3 ], pcoeffs_ [4 ], pcoeffs_ [5 ]],
1138
+ ]
1139
+ )
1140
+ m2 = np .array (
1141
+ [
1142
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1143
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1144
+ ]
1145
+ )
1146
+
1147
+ bbox_xyxy = convert_bounding_box_format (
1148
+ bbox , old_format = bbox .format , new_format = features .BoundingBoxFormat .XYXY
1149
+ )
1150
+ points = np .array (
1151
+ [
1152
+ [bbox_xyxy [0 ].item (), bbox_xyxy [1 ].item (), 1.0 ],
1153
+ [bbox_xyxy [2 ].item (), bbox_xyxy [1 ].item (), 1.0 ],
1154
+ [bbox_xyxy [0 ].item (), bbox_xyxy [3 ].item (), 1.0 ],
1155
+ [bbox_xyxy [2 ].item (), bbox_xyxy [3 ].item (), 1.0 ],
1156
+ ]
1157
+ )
1158
+ numer = np .matmul (points , m1 .T )
1159
+ denom = np .matmul (points , m2 .T )
1160
+ transformed_points = numer / denom
1161
+ out_bbox = [
1162
+ np .min (transformed_points [:, 0 ]),
1163
+ np .min (transformed_points [:, 1 ]),
1164
+ np .max (transformed_points [:, 0 ]),
1165
+ np .max (transformed_points [:, 1 ]),
1166
+ ]
1167
+ out_bbox = features .BoundingBox (
1168
+ out_bbox ,
1169
+ format = features .BoundingBoxFormat .XYXY ,
1170
+ image_size = bbox .image_size ,
1171
+ dtype = torch .float32 ,
1172
+ device = bbox .device ,
1173
+ )
1174
+ return convert_bounding_box_format (
1175
+ out_bbox , old_format = features .BoundingBoxFormat .XYXY , new_format = bbox .format , copy = False
1176
+ )
1177
+
1178
+ image_size = (32 , 38 )
1179
+
1180
+ pcoeffs = _get_perspective_coeffs (startpoints , endpoints )
1181
+ inv_pcoeffs = _get_perspective_coeffs (endpoints , startpoints )
1182
+
1183
+ for bboxes in make_bounding_boxes (
1184
+ image_sizes = [
1185
+ image_size ,
1186
+ ],
1187
+ extra_dims = ((4 ,),),
1188
+ ):
1189
+ bboxes = bboxes .to (device )
1190
+ bboxes_format = bboxes .format
1191
+ bboxes_image_size = bboxes .image_size
1192
+
1193
+ output_bboxes = F .perspective_bounding_box (
1194
+ bboxes ,
1195
+ bboxes_format ,
1196
+ perspective_coeffs = pcoeffs ,
1197
+ )
1198
+
1199
+ if bboxes .ndim < 2 :
1200
+ bboxes = [bboxes ]
1201
+
1202
+ expected_bboxes = []
1203
+ for bbox in bboxes :
1204
+ bbox = features .BoundingBox (bbox , format = bboxes_format , image_size = bboxes_image_size )
1205
+ expected_bboxes .append (_compute_expected_bbox (bbox , inv_pcoeffs ))
1206
+ if len (expected_bboxes ) > 1 :
1207
+ expected_bboxes = torch .stack (expected_bboxes )
1208
+ else :
1209
+ expected_bboxes = expected_bboxes [0 ]
1210
+ torch .testing .assert_close (output_bboxes , expected_bboxes , rtol = 1e-5 , atol = 1e-5 )
1211
+
1212
+
1213
+ @pytest .mark .parametrize ("device" , cpu_and_gpu ())
1214
+ @pytest .mark .parametrize (
1215
+ "startpoints, endpoints" ,
1216
+ [
1217
+ [[[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]], [[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]]],
1218
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[0 , 0 ], [33 , 0 ], [33 , 25 ], [0 , 25 ]]],
1219
+ [[[3 , 2 ], [32 , 3 ], [30 , 24 ], [2 , 25 ]], [[5 , 5 ], [30 , 3 ], [33 , 19 ], [4 , 25 ]]],
1220
+ ],
1221
+ )
1222
+ def test_correctness_perspective_segmentation_mask (device , startpoints , endpoints ):
1223
+ def _compute_expected_mask (mask , pcoeffs_ ):
1224
+ assert mask .ndim == 3 and mask .shape [0 ] == 1
1225
+ m1 = np .array (
1226
+ [
1227
+ [pcoeffs_ [0 ], pcoeffs_ [1 ], pcoeffs_ [2 ]],
1228
+ [pcoeffs_ [3 ], pcoeffs_ [4 ], pcoeffs_ [5 ]],
1229
+ ]
1230
+ )
1231
+ m2 = np .array (
1232
+ [
1233
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1234
+ [pcoeffs_ [6 ], pcoeffs_ [7 ], 1.0 ],
1235
+ ]
1236
+ )
1237
+
1238
+ expected_mask = torch .zeros_like (mask .cpu ())
1239
+ for out_y in range (expected_mask .shape [1 ]):
1240
+ for out_x in range (expected_mask .shape [2 ]):
1241
+ output_pt = np .array ([out_x + 0.5 , out_y + 0.5 , 1.0 ])
1242
+
1243
+ numer = np .matmul (output_pt , m1 .T )
1244
+ denom = np .matmul (output_pt , m2 .T )
1245
+ input_pt = np .floor (numer / denom ).astype (np .int32 )
1246
+
1247
+ in_x , in_y = input_pt [:2 ]
1248
+ if 0 <= in_x < mask .shape [2 ] and 0 <= in_y < mask .shape [1 ]:
1249
+ expected_mask [0 , out_y , out_x ] = mask [0 , in_y , in_x ]
1250
+ return expected_mask .to (mask .device )
1251
+
1252
+ pcoeffs = _get_perspective_coeffs (startpoints , endpoints )
1253
+
1254
+ for mask in make_segmentation_masks (extra_dims = ((), (4 ,))):
1255
+ mask = mask .to (device )
1256
+
1257
+ output_mask = F .perspective_segmentation_mask (
1258
+ mask ,
1259
+ perspective_coeffs = pcoeffs ,
1260
+ )
1261
+
1262
+ if mask .ndim < 4 :
1263
+ masks = [mask ]
1264
+ else :
1265
+ masks = [m for m in mask ]
1266
+
1267
+ expected_masks = []
1268
+ for mask in masks :
1269
+ expected_mask = _compute_expected_mask (mask , pcoeffs )
1270
+ expected_masks .append (expected_mask )
1271
+ if len (expected_masks ) > 1 :
1272
+ expected_masks = torch .stack (expected_masks )
1273
+ else :
1274
+ expected_masks = expected_masks [0 ]
1275
+ torch .testing .assert_close (output_mask , expected_masks )
0 commit comments