Make crop work the same for pil and tensor (#3770)

jaesuny · datumbox · web-flow · commit 6397190105cf · 2021-05-17T15:46:47.000+01:00
* Make crop work the same for pil and tensor

* Only call pad if needed in functional_tensor.crop

* Fix top-left functional_tensor.crop

* Update document for functional.crop

* Add other test cases of functional.crop

* Fix bug

* Fixing formattter

* Fix stylings

Co-authored-by: Vasilis Vryniotis &lt;datumbox@users.noreply.github.com&gt;
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
@@ -188,6 +188,22 @@ def test_crop(self):
             'crop', 'RandomCrop', fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
         )
 
+        # Test transforms.functional.crop including outside the image area
+        fn_kwargs = {"top": -2, "left": 3, "height": 4, "width": 5}  # top
+        self._test_functional_op('crop', fn_kwargs=fn_kwargs)
+
+        fn_kwargs = {"top": 1, "left": -3, "height": 4, "width": 5}  # left
+        self._test_functional_op('crop', fn_kwargs=fn_kwargs)
+
+        fn_kwargs = {"top": 7, "left": 3, "height": 4, "width": 5}  # bottom
+        self._test_functional_op('crop', fn_kwargs=fn_kwargs)
+
+        fn_kwargs = {"top": 3, "left": 8, "height": 4, "width": 5}  # right
+        self._test_functional_op('crop', fn_kwargs=fn_kwargs)
+
+        fn_kwargs = {"top": -3, "left": -3, "height": 15, "width": 15}  # all
+        self._test_functional_op('crop', fn_kwargs=fn_kwargs)
+
         sizes = [5, [5, ], [6, 6]]
         padding_configs = [
             {"padding_mode": "constant", "fill": 0},
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -463,7 +463,8 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con
 def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
     """Crop the given image at specified location and output size.
     If the image is torch Tensor, it is expected
-    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions.
+    If image size is smaller than output size along any edge, image is padded with 0 and then cropped.
 
     Args:
         img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
@@ -122,7 +122,14 @@ def hflip(img: Tensor) -> Tensor:
 def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
     _assert_image_tensor(img)
 
-    return img[..., top:top + height, left:left + width]
+    w, h = _get_image_size(img)
+    right = left + width
+    bottom = top + height
+
+    if left < 0 or top < 0 or right > w or bottom > h:
+        padding_ltrb = [max(-left, 0), max(-top, 0), max(right - w, 0), max(bottom - h, 0)]
+        return pad(img[..., max(top, 0):bottom, max(left, 0):right], padding_ltrb, fill=0)
+    return img[..., top:bottom, left:right]
 
 
 def rgb_to_grayscale(img: Tensor, num_output_channels: int = 1) -> Tensor: