[WIP] Unified Tensor/PIL crop

vfdev-5 · vfdev-5 · commit ad7b2be8e22c · 2020-06-23T11:13:21.000+02:00
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
@@ -18,26 +18,30 @@ def compareTensorToPIL(self, tensor, pil_image):
         pil_tensor = torch.as_tensor(np.array(pil_image).transpose((2, 0, 1)))
         self.assertTrue(tensor.equal(pil_tensor))
 
-    def _test_flip(self, func, method):
-        tensor, pil_img = self._create_data()
-        flip_tensor = getattr(F, func)(tensor)
-        flip_pil_img = getattr(F, func)(pil_img)
-        self.compareTensorToPIL(flip_tensor, flip_pil_img)
+    def _test_geom_op(self, func, method, fn_kwargs=None, meth_kwargs=None):
+        if fn_kwargs is None:
+            fn_kwargs = {}
+        if meth_kwargs is None:
+            meth_kwargs = {}
+        tensor, pil_img = self._create_data(height=10, width=10)
+        transformed_tensor = getattr(F, func)(tensor, **fn_kwargs)
+        transformed_pil_img = getattr(F, func)(pil_img, **fn_kwargs)
+        self.compareTensorToPIL(transformed_tensor, transformed_pil_img)
 
         scripted_fn = torch.jit.script(getattr(F, func))
-        flip_tensor_script = scripted_fn(tensor)
-        self.assertTrue(flip_tensor.equal(flip_tensor_script))
+        transformed_tensor_script = scripted_fn(tensor, **fn_kwargs)
+        self.assertTrue(transformed_tensor.equal(transformed_tensor_script))
 
         # test for class interface
-        f = getattr(T, method)()
+        f = getattr(T, method)(**meth_kwargs)
         scripted_fn = torch.jit.script(f)
         scripted_fn(tensor)
 
     def test_random_horizontal_flip(self):
-        self._test_flip('hflip', 'RandomHorizontalFlip')
+        self._test_geom_op('hflip', 'RandomHorizontalFlip')
 
     def test_random_vertical_flip(self):
-        self._test_flip('vflip', 'RandomVerticalFlip')
+        self._test_geom_op('vflip', 'RandomVerticalFlip')
 
     def test_adjustments(self):
         fns = ['adjust_brightness', 'adjust_contrast', 'adjust_saturation']
@@ -65,6 +69,13 @@ def test_adjustments(self):
                 self.assertLess(max_diff, 5 / 255 + 1e-5)
                 self.assertLess(max_diff_scripted, 5 / 255 + 1e-5)
 
+    def test_crop(self):
+        fn_kwargs = {"top": 2, "left": 3, "height": 4, "width": 5}
+        meth_kwargs = {"size": (4, 5), "padding": 4, "pad_if_needed": True, }
+        self._test_geom_op(
+            'crop', 'RandomCrop', fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -16,18 +16,24 @@
 from . import functional_tensor as F_t
 
 
-def _is_pil_image(img):
-    if accimage is not None:
-        return isinstance(img, (Image.Image, accimage.Image))
-    else:
-        return isinstance(img, Image.Image)
+@torch.jit.export
+def _get_image_size(img):
+    # type: (Tensor) -> List[int]
+    if isinstance(img, torch.Tensor):
+        return F_t._get_image_size(img)
+
+    return F_pil._get_image_size(img)
 
 
+@torch.jit.ignore
 def _is_numpy(img):
+    # type: (Any) -> bool
     return isinstance(img, np.ndarray)
 
 
+@torch.jit.ignore
 def _is_numpy_image(img):
+    # type: (Any) -> bool
     return img.ndim in {2, 3}
 
 
@@ -42,7 +48,7 @@ def to_tensor(pic):
     Returns:
         Tensor: Converted image.
     """
-    if not(_is_pil_image(pic) or _is_numpy(pic)):
+    if not(F_pil._is_pil_image(pic) or _is_numpy(pic)):
         raise TypeError('pic should be PIL Image or ndarray. Got {}'.format(type(pic)))
 
     if _is_numpy(pic) and not _is_numpy_image(pic):
@@ -97,7 +103,7 @@ def pil_to_tensor(pic):
     Returns:
         Tensor: Converted image.
     """
-    if not(_is_pil_image(pic)):
+    if not(F_pil._is_pil_image(pic)):
         raise TypeError('pic should be PIL Image. Got {}'.format(type(pic)))
 
     if accimage is not None and isinstance(pic, accimage.Image):
@@ -315,7 +321,7 @@ def resize(img, size, interpolation=Image.BILINEAR):
     Returns:
         PIL Image: Resized image.
     """
-    if not _is_pil_image(img):
+    if not F_pil._is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
     if not (isinstance(size, int) or (isinstance(size, Iterable) and len(size) == 2)):
         raise TypeError('Got inappropriate size arg: {}'.format(size))
@@ -374,7 +380,7 @@ def pad(img, padding, fill=0, padding_mode='constant'):
     Returns:
         PIL Image: Padded image.
     """
-    if not _is_pil_image(img):
+    if not F_pil._is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
     if not isinstance(padding, (numbers.Number, tuple)):
@@ -436,23 +442,24 @@ def pad(img, padding, fill=0, padding_mode='constant'):
         return Image.fromarray(img)
 
 
-def crop(img, top, left, height, width):
+def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
     """Crop the given PIL Image.
 
     Args:
-        img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
+        img (PIL Image or torch.Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
         top (int): Vertical component of the top left corner of the crop box.
         left (int): Horizontal component of the top left corner of the crop box.
         height (int): Height of the crop box.
         width (int): Width of the crop box.
 
     Returns:
-        PIL Image: Cropped image.
+        PIL Image or torch.Tensor: Cropped image.
     """
-    if not _is_pil_image(img):
-        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
-    return img.crop((left, top, left + width, top + height))
+    if not isinstance(img, torch.Tensor):
+        return F_pil.crop(img, top, left, height, width)
+
+    return F_t.crop(img, top, left, height, width)
 
 
 def center_crop(img, output_size):
@@ -491,7 +498,7 @@ def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINE
     Returns:
         PIL Image: Cropped image.
     """
-    assert _is_pil_image(img), 'img should be PIL Image'
+    assert F_pil._is_pil_image(img), 'img should be PIL Image'
     img = crop(img, top, left, height, width)
     img = resize(img, size, interpolation)
     return img
@@ -501,13 +508,13 @@ def hflip(img: Tensor) -> Tensor:
     """Horizontally flip the given PIL Image or torch Tensor.
 
     Args:
-        img (PIL Image or Torch Tensor): Image to be flipped. If img
+        img (PIL Image or torch.Tensor): Image to be flipped. If img
             is a Tensor, it is expected to be in [..., H, W] format,
             where ... means it can have an arbitrary number of trailing
             dimensions.
 
     Returns:
-        PIL Image:  Horizontally flipped image.
+        PIL Image or torch.Tensor:  Horizontally flipped image.
     """
     if not isinstance(img, torch.Tensor):
         return F_pil.hflip(img)
@@ -593,7 +600,7 @@ def perspective(img, startpoints, endpoints, interpolation=Image.BICUBIC, fill=N
         PIL Image:  Perspectively transformed Image.
     """
 
-    if not _is_pil_image(img):
+    if not F_pil._is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
     opts = _parse_fill(fill, img, '5.0.0')
@@ -797,7 +804,7 @@ def adjust_gamma(img, gamma, gain=1):
             while gamma smaller than 1 make dark regions lighter.
         gain (float): The constant multiplier.
     """
-    if not _is_pil_image(img):
+    if not F_pil._is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
     if gamma < 0:
@@ -837,7 +844,7 @@ def rotate(img, angle, resample=False, expand=False, center=None, fill=None):
     .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters
 
     """
-    if not _is_pil_image(img):
+    if not F_pil._is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
     opts = _parse_fill(fill, img, '5.2.0')
@@ -918,7 +925,7 @@ def affine(img, angle, translate, scale, shear, resample=0, fillcolor=None):
             If omitted, or if the image has mode "1" or "P", it is set to ``PIL.Image.NEAREST``.
         fillcolor (int): Optional fill color for the area outside the transform in the output image. (Pillow>=5.0.0)
     """
-    if not _is_pil_image(img):
+    if not F_pil._is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
     assert isinstance(translate, (tuple, list)) and len(translate) == 2, \
@@ -945,7 +952,7 @@ def to_grayscale(img, num_output_channels=1):
 
             if num_output_channels = 3 : returned image is 3 channel with r = g = b
     """
-    if not _is_pil_image(img):
+    if not F_pil._is_pil_image(img):
         raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
 
     if num_output_channels == 1:
diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py
@@ -3,18 +3,27 @@
     import accimage
 except ImportError:
     accimage = None
-from PIL import Image, ImageOps, ImageEnhance, __version__ as PILLOW_VERSION
+from PIL import Image, ImageOps, ImageEnhance
 import numpy as np
 
 
 @torch.jit.unused
 def _is_pil_image(img):
+    # type: (Any) -> bool
     if accimage is not None:
         return isinstance(img, (Image.Image, accimage.Image))
     else:
         return isinstance(img, Image.Image)
 
 
+@torch.jit.unused
+def _get_image_size(img):
+    # type: (Any) -> List[int]
+    if _is_pil_image(img):
+        return img.size
+    raise TypeError("Unexpected type {}".format(type(img)))
+
+
 @torch.jit.unused
 def hflip(img):
     """Horizontally flip the given PIL Image.
@@ -152,3 +161,23 @@ def adjust_hue(img, hue_factor):
 
     img = Image.merge('HSV', (h, s, v)).convert(input_mode)
     return img
+
+
+@torch.jit.unused
+def crop(img: Image.Image, top: int, left: int, height: int, width: int) -> Image.Image:
+    """Crop the given PIL Image.
+
+    Args:
+        img (PIL Image): Image to be cropped. (0,0) denotes the top left corner of the image.
+        top (int): Vertical component of the top left corner of the crop box.
+        left (int): Horizontal component of the top left corner of the crop box.
+        height (int): Height of the crop box.
+        width (int): Width of the crop box.
+
+    Returns:
+        PIL Image: Cropped image.
+    """
+    if not _is_pil_image(img):
+        raise TypeError('img should be PIL Image. Got {}'.format(type(img)))
+
+    return img.crop((left, top, left + width, top + height))
diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
@@ -3,8 +3,18 @@
 from torch.jit.annotations import Optional, List, BroadcastingList2, Tuple
 
 
-def _is_tensor_a_torch_image(input):
-    return input.ndim >= 2
+@torch.jit.export
+def _is_tensor_a_torch_image(x):
+    # type: (Tensor) -> bool
+    return x.ndim >= 2
+
+
+@torch.jit.export
+def _get_image_size(img):
+    # type: (Tensor) -> List[int]
+    if _is_tensor_a_torch_image(img):
+        return [img.shape[-1], img.shape[-2]]
+    raise TypeError("Unexpected type {}".format(type(img)))
 
 
 def vflip(img):
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py