@@ -279,9 +279,9 @@ def affine_image_tensor(
279279 center_f = [0.0 , 0.0 ]
280280 if center is not None :
281281 # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
282- center_f = [1.0 * (c - s * 0.5 ) for c , s in zip (center , [width , height ])]
282+ center_f = [(c - s * 0.5 ) for c , s in zip (center , [width , height ])]
283283
284- translate_f = [1.0 * t for t in translate ]
284+ translate_f = [float ( t ) for t in translate ]
285285 matrix = _get_inverse_affine_matrix (center_f , angle , translate_f , scale , shear )
286286
287287 output = _FT .affine (image , matrix , interpolation = interpolation .value , fill = fill )
@@ -321,7 +321,7 @@ def _affine_bounding_box_xyxy(
321321 shear : List [float ],
322322 center : Optional [List [float ]] = None ,
323323 expand : bool = False ,
324- ) -> torch .Tensor :
324+ ) -> Tuple [ torch .Tensor , Tuple [ int , int ]] :
325325 angle , translate , shear , center = _affine_parse_args (
326326 angle , translate , scale , shear , InterpolationMode .NEAREST , center
327327 )
@@ -333,19 +333,24 @@ def _affine_bounding_box_xyxy(
333333 dtype = bounding_box .dtype if torch .is_floating_point (bounding_box ) else torch .float32
334334 device = bounding_box .device
335335
336- affine_matrix = torch .tensor (
337- _get_inverse_affine_matrix (center , angle , translate , scale , shear , inverted = False ),
338- dtype = dtype ,
339- device = device ,
340- ).view (2 , 3 )
336+ affine_vector = _get_inverse_affine_matrix (center , angle , translate , scale , shear , inverted = False )
337+ transposed_affine_matrix = (
338+ torch .tensor (
339+ affine_vector ,
340+ dtype = dtype ,
341+ device = device ,
342+ )
343+ .view (2 , 3 )
344+ .T
345+ )
341346 # 1) Let's transform bboxes into a tensor of 4 points (top-left, top-right, bottom-left, bottom-right corners).
342347 # Tensor of points has shape (N * 4, 3), where N is the number of bboxes
343348 # Single point structure is similar to
344349 # [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1)]
345350 points = bounding_box [:, [[0 , 1 ], [2 , 1 ], [2 , 3 ], [0 , 3 ]]].view (- 1 , 2 )
346351 points = torch .cat ([points , torch .ones (points .shape [0 ], 1 , device = points .device )], dim = - 1 )
347352 # 2) Now let's transform the points using affine matrix
348- transformed_points = torch .matmul (points , affine_matrix . T )
353+ transformed_points = torch .matmul (points , transposed_affine_matrix )
349354 # 3) Reshape transformed points to [N boxes, 4 points, x/y coords]
350355 # and compute bounding box from 4 transformed points:
351356 transformed_points = transformed_points .view (- 1 , 4 , 2 )
@@ -360,20 +365,24 @@ def _affine_bounding_box_xyxy(
360365 points = torch .tensor (
361366 [
362367 [0.0 , 0.0 , 1.0 ],
363- [0.0 , 1.0 * height , 1.0 ],
364- [1.0 * width , 1.0 * height , 1.0 ],
365- [1.0 * width , 0.0 , 1.0 ],
368+ [0.0 , float ( height ) , 1.0 ],
369+ [float ( width ), float ( height ) , 1.0 ],
370+ [float ( width ) , 0.0 , 1.0 ],
366371 ],
367372 dtype = dtype ,
368373 device = device ,
369374 )
370- new_points = torch .matmul (points , affine_matrix . T )
375+ new_points = torch .matmul (points , transposed_affine_matrix )
371376 tr , _ = torch .min (new_points , dim = 0 , keepdim = True )
372377 # Translate bounding boxes
373378 out_bboxes [:, 0 ::2 ] = out_bboxes [:, 0 ::2 ] - tr [:, 0 ]
374379 out_bboxes [:, 1 ::2 ] = out_bboxes [:, 1 ::2 ] - tr [:, 1 ]
380+ # Estimate meta-data for image with inverted=True and with center=[0,0]
381+ affine_vector = _get_inverse_affine_matrix ([0.0 , 0.0 ], angle , translate , scale , shear )
382+ new_width , new_height = _FT ._compute_affine_output_size (affine_vector , width , height )
383+ image_size = (new_height , new_width )
375384
376- return out_bboxes .to (bounding_box .dtype )
385+ return out_bboxes .to (bounding_box .dtype ), image_size
377386
378387
379388def affine_bounding_box (
@@ -391,7 +400,7 @@ def affine_bounding_box(
391400 bounding_box , old_format = format , new_format = features .BoundingBoxFormat .XYXY
392401 ).view (- 1 , 4 )
393402
394- out_bboxes = _affine_bounding_box_xyxy (bounding_box , image_size , angle , translate , scale , shear , center )
403+ out_bboxes , _ = _affine_bounding_box_xyxy (bounding_box , image_size , angle , translate , scale , shear , center )
395404
396405 # out_bboxes should be of shape [N boxes, 4]
397406
@@ -502,7 +511,7 @@ def rotate_image_tensor(
502511 warnings .warn ("The provided center argument has no effect on the result if expand is True" )
503512 else :
504513 # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
505- center_f = [1.0 * (c - s * 0.5 ) for c , s in zip (center , [width , height ])]
514+ center_f = [(c - s * 0.5 ) for c , s in zip (center , [width , height ])]
506515
507516 # due to current incoherence of rotation angle direction between affine and rotate implementations
508517 # we need to set -angle.
@@ -558,7 +567,7 @@ def rotate_bounding_box(
558567 bounding_box , old_format = format , new_format = features .BoundingBoxFormat .XYXY
559568 ).view (- 1 , 4 )
560569
561- out_bboxes = _affine_bounding_box_xyxy (
570+ out_bboxes , image_size = _affine_bounding_box_xyxy (
562571 bounding_box ,
563572 image_size ,
564573 angle = - angle ,
@@ -569,14 +578,6 @@ def rotate_bounding_box(
569578 expand = expand ,
570579 )
571580
572- if expand :
573- # TODO: Move this computation inside of `_affine_bounding_box_xyxy` to avoid computing the rotation and points
574- # matrix twice
575- height , width = image_size
576- rotation_matrix = _get_inverse_affine_matrix ([0.0 , 0.0 ], angle , [0.0 , 0.0 ], 1.0 , [0.0 , 0.0 ])
577- new_width , new_height = _FT ._compute_affine_output_size (rotation_matrix , width , height )
578- image_size = (new_height , new_width )
579-
580581 return (
581582 convert_format_bounding_box (
582583 out_bboxes , old_format = features .BoundingBoxFormat .XYXY , new_format = format , copy = False
0 commit comments