Unified FiveCrop and F.five_crop

vfdev-5 · vfdev-5 · commit 01175db1c2b1 · 2020-06-26T12:00:54.000+02:00
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
@@ -118,6 +118,45 @@ def test_center_crop(self):
             "center_crop", "CenterCrop", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
         )
 
+    def _test_geom_op_list_output(self, func, method, out_length, fn_kwargs=None, meth_kwargs=None):
+        if fn_kwargs is None:
+            fn_kwargs = {}
+        if meth_kwargs is None:
+            meth_kwargs = {}
+        tensor, pil_img = self._create_data(height=20, width=20)
+        transformed_t_list = getattr(F, func)(tensor, **fn_kwargs)
+        transformed_p_list = getattr(F, func)(pil_img, **fn_kwargs)
+        self.assertEqual(len(transformed_t_list), len(transformed_p_list))
+        self.assertEqual(len(transformed_t_list), out_length)
+        for transformed_tensor, transformed_pil_img in zip(transformed_t_list, transformed_p_list):
+            self.compareTensorToPIL(transformed_tensor, transformed_pil_img)
+
+        scripted_fn = torch.jit.script(getattr(F, func))
+        transformed_t_list_script = scripted_fn(tensor.detach().clone(), **fn_kwargs)
+        self.assertEqual(len(transformed_t_list), len(transformed_t_list_script))
+        self.assertEqual(len(transformed_t_list_script), out_length)
+        for transformed_tensor, transformed_tensor_script in zip(transformed_t_list, transformed_t_list_script):
+            self.assertTrue(transformed_tensor.equal(transformed_tensor_script),
+                            msg="{} vs {}".format(transformed_tensor, transformed_tensor_script))
+
+        # test for class interface
+        f = getattr(T, method)(**meth_kwargs)
+        scripted_fn = torch.jit.script(f)
+        output = scripted_fn(tensor)
+        self.assertEqual(len(output), len(transformed_t_list_script))
+
+    def test_five_crop(self):
+        fn_kwargs = {"size": (5,)}
+        meth_kwargs = {"size": (5, )}
+        self._test_geom_op_list_output(
+            "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = {"size": (4, 5)}
+        meth_kwargs = {"size": (4, 5)}
+        self._test_geom_op_list_output(
+            "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -10,7 +10,7 @@
 
 import torch
 from torch import Tensor
-from torch.jit.annotations import List
+from torch.jit.annotations import List, Tuple
 
 try:
     import accimage
@@ -423,13 +423,15 @@ def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
         img (PIL Image or Tensor): Image to be cropped.
         output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int
             it is used for both directions
+
     Returns:
         PIL Image or Tensor: Cropped image.
     """
     if isinstance(output_size, numbers.Number):
         output_size = (int(output_size), int(output_size))
     elif isinstance(output_size, (tuple, list)) and len(output_size) == 1:
         output_size = (output_size[0], output_size[0])
+
     image_width, image_height = _get_image_size(img)
     crop_height, crop_width = output_size
 
@@ -589,8 +591,10 @@ def vflip(img: Tensor) -> Tensor:
     return F_t.vflip(img)
 
 
-def five_crop(img, size):
-    """Crop the given PIL Image into four corners and the central crop.
+def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
+    """Crop the given image into four corners and the central crop.
+    The image can be a PIL Image or a torch Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     .. Note::
         This transform returns a tuple of images and there may be a
@@ -607,22 +611,26 @@ def five_crop(img, size):
     """
     if isinstance(size, numbers.Number):
         size = (int(size), int(size))
-    else:
-        assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+    elif isinstance(size, (tuple, list)) and len(size) == 1:
+        size = (size[0], size[0])
+
+    if len(size) != 2:
+        raise ValueError("Please provide only two dimensions (h, w) for size.")
 
-    image_width, image_height = img.size
+    image_width, image_height = _get_image_size(img)
     crop_height, crop_width = size
     if crop_width > image_width or crop_height > image_height:
         msg = "Requested crop size {} is bigger than input size {}"
         raise ValueError(msg.format(size, (image_height, image_width)))
 
-    tl = img.crop((0, 0, crop_width, crop_height))
-    tr = img.crop((image_width - crop_width, 0, image_width, crop_height))
-    bl = img.crop((0, image_height - crop_height, crop_width, image_height))
-    br = img.crop((image_width - crop_width, image_height - crop_height,
-                   image_width, image_height))
-    center = center_crop(img, (crop_height, crop_width))
-    return (tl, tr, bl, br, center)
+    tl = crop(img, 0, 0, crop_height, crop_width)
+    tr = crop(img, 0, image_width - crop_width, crop_height, crop_width)
+    bl = crop(img, image_height - crop_height, 0, crop_height, crop_width)
+    br = crop(img, image_height - crop_height, image_width - crop_width, crop_height, crop_width)
+
+    center = center_crop(img, [crop_height, crop_width])
+
+    return tl, tr, bl, br, center
 
 
 def ten_crop(img, size, vertical_flip=False):
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
@@ -260,7 +260,7 @@ class CenterCrop(torch.nn.Module):
     Args:
         size (sequence or int): Desired output size of the crop. If size is an
             int instead of sequence like (h, w), a square crop (size, size) is
-            made. For scripted operation please use a list: (size, ) or (size_x, size_y)
+            made. For scripted operation, please use a list: (size, ) or (size_x, size_y)
     """
 
     def __init__(self, size):
@@ -270,6 +270,9 @@ def __init__(self, size):
         elif isinstance(size, (tuple, list)) and len(size) == 1:
             self.size = (size[0], size[0])
         else:
+            if len(size) != 2:
+                raise ValueError("Please provide only two dimensions (h, w) for size.")
+
             self.size = size
 
     def forward(self, img):
@@ -572,7 +575,7 @@ def __repr__(self):
 
 
 class RandomVerticalFlip(torch.nn.Module):
-    """Vertically flip the given PIL Image randomly with a given probability.
+    """Vertically flip the given image randomly with a given probability.
     The image can be a PIL Image or a torch Tensor, in which case it is expected
     to have [..., H, W] shape, where ... means an arbitrary number of leading
     dimensions
@@ -769,8 +772,11 @@ def __init__(self, *args, **kwargs):
         super(RandomSizedCrop, self).__init__(*args, **kwargs)
 
 
-class FiveCrop(object):
-    """Crop the given PIL Image into four corners and the central crop
+class FiveCrop(torch.nn.Module):
+    """Crop the given image into four corners and the central crop.
+    The image can be a PIL Image or a torch Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions
 
     .. Note::
          This transform returns a tuple of images and there may be a mismatch in the number of
@@ -780,6 +786,7 @@ class FiveCrop(object):
     Args:
          size (sequence or int): Desired output size of the crop. If size is an ``int``
             instead of sequence like (h, w), a square crop of size (size, size) is made.
+            For scripted operation, please use a list: (size, ) or (size_x, size_y)
 
     Example:
          >>> transform = Compose([
@@ -794,14 +801,26 @@ class FiveCrop(object):
     """
 
     def __init__(self, size):
+        super().__init__()
         self.size = size
         if isinstance(size, numbers.Number):
             self.size = (int(size), int(size))
+        elif isinstance(size, (tuple, list)) and len(size) == 1:
+            self.size = (size[0], size[0])
         else:
-            assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+            if len(size) != 2:
+                raise ValueError("Please provide only two dimensions (h, w) for size.")
+
             self.size = size
 
-    def __call__(self, img):
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped.
+
+        Returns:
+            PIL Image or Tensor: Cropped image.
+        """
         return F.five_crop(img, self.size)
 
     def __repr__(self):