Improved tests and docs

vfdev-5 · vfdev-5 · commit b356e8b72664 · 2020-06-26T13:31:34.000+02:00
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
@@ -101,11 +101,28 @@ def test_pad(self):
 
     def test_crop(self):
         fn_kwargs = {"top": 2, "left": 3, "height": 4, "width": 5}
-        meth_kwargs = {"size": (4, 5), "padding": [4, ], "pad_if_needed": True, }
+        # Test transforms.RandomCrop with size and padding as tuple
+        meth_kwargs = {"size": (4, 5), "padding": (4, 4), "pad_if_needed": True, }
         self._test_geom_op(
             'crop', 'RandomCrop', fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
         )
 
+        tensor = torch.randint(0, 255, (3, 10, 10), dtype=torch.uint8)
+        # Test torchscript of transforms.RandomCrop with size as int
+        f = T.RandomCrop(size=5)
+        scripted_fn = torch.jit.script(f)
+        scripted_fn(tensor)
+
+        # Test torchscript of transforms.RandomCrop with size as [int, ]
+        f = T.RandomCrop(size=[5, ], padding=[2, ])
+        scripted_fn = torch.jit.script(f)
+        scripted_fn(tensor)
+
+        # Test torchscript of transforms.RandomCrop with size as list
+        f = T.RandomCrop(size=[6, 6])
+        scripted_fn = torch.jit.script(f)
+        scripted_fn(tensor)
+
     def test_center_crop(self):
         fn_kwargs = {"output_size": (4, 5)}
         meth_kwargs = {"size": (4, 5), }
@@ -117,6 +134,21 @@ def test_center_crop(self):
         self._test_geom_op(
             "center_crop", "CenterCrop", fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
         )
+        tensor = torch.randint(0, 255, (3, 10, 10), dtype=torch.uint8)
+        # Test torchscript of transforms.CenterCrop with size as int
+        f = T.CenterCrop(size=5)
+        scripted_fn = torch.jit.script(f)
+        scripted_fn(tensor)
+
+        # Test torchscript of transforms.CenterCrop with size as [int, ]
+        f = T.CenterCrop(size=[5, ])
+        scripted_fn = torch.jit.script(f)
+        scripted_fn(tensor)
+
+        # Test torchscript of transforms.CenterCrop with size as tuple
+        f = T.CenterCrop(size=(6, 6))
+        scripted_fn = torch.jit.script(f)
+        scripted_fn(tensor)
 
     def _test_geom_op_list_output(self, func, method, out_length, fn_kwargs=None, meth_kwargs=None):
         if fn_kwargs is None:
@@ -146,13 +178,19 @@ def _test_geom_op_list_output(self, func, method, out_length, fn_kwargs=None, me
         self.assertEqual(len(output), len(transformed_t_list_script))
 
     def test_five_crop(self):
-        fn_kwargs = {"size": (5,)}
-        meth_kwargs = {"size": (5, )}
+        fn_kwargs = meth_kwargs = {"size": (5,)}
+        self._test_geom_op_list_output(
+            "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = meth_kwargs = {"size": [5, ]}
+        self._test_geom_op_list_output(
+            "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = meth_kwargs = {"size": (4, 5)}
         self._test_geom_op_list_output(
             "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
         )
-        fn_kwargs = {"size": (4, 5)}
-        meth_kwargs = {"size": (4, 5)}
+        fn_kwargs = meth_kwargs = {"size": [4, 5]}
         self._test_geom_op_list_output(
             "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
         )
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -395,7 +395,10 @@ def pad(img: Tensor, padding: List[int], fill: int = 0, padding_mode: str = "con
 
 
 def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
-    """Crop the given PIL Image.
+    """Crop the given image at specified location and output size.
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions
 
     Args:
         img (PIL Image or Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
@@ -416,13 +419,13 @@ def crop(img: Tensor, top: int, left: int, height: int, width: int) -> Tensor:
 
 def center_crop(img: Tensor, output_size: List[int]) -> Tensor:
     """Crops the given image at the center.
-    The image can be a PIL Image or a torch Tensor, in which case it is expected
+    The image can be a PIL Image or a Tensor, in which case it is expected
     to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     Args:
         img (PIL Image or Tensor): Image to be cropped.
         output_size (sequence or int): (height, width) of the crop box. If int or sequence with single int
-            it is used for both directions
+            it is used for both directions.
 
     Returns:
         PIL Image or Tensor: Cropped image.
@@ -469,7 +472,7 @@ def resized_crop(img, top, left, height, width, size, interpolation=Image.BILINE
 
 
 def hflip(img: Tensor) -> Tensor:
-    """Horizontally flip the given PIL Image or torch Tensor.
+    """Horizontally flip the given PIL Image or Tensor.
 
     Args:
         img (PIL Image or Tensor): Image to be flipped. If img
@@ -531,8 +534,7 @@ def _get_perspective_coeffs(startpoints, endpoints):
 
     Args:
         List containing [top-left, top-right, bottom-right, bottom-left] of the original image,
-        List containing [top-left, top-right, bottom-right, bottom-left] of the transformed
-                   image
+        List containing [top-left, top-right, bottom-right, bottom-left] of the transformed image
     Returns:
         octuple (a, b, c, d, e, f, g, h) for transforming each pixel.
     """
@@ -577,7 +579,7 @@ def vflip(img: Tensor) -> Tensor:
     """Vertically flip the given PIL Image or torch Tensor.
 
     Args:
-        img (PIL Image or Torch Tensor): Image to be flipped. If img
+        img (PIL Image or Tensor): Image to be flipped. If img
             is a Tensor, it is expected to be in [..., H, W] format,
             where ... means it can have an arbitrary number of trailing
             dimensions.
@@ -593,17 +595,18 @@ def vflip(img: Tensor) -> Tensor:
 
 def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]:
     """Crop the given image into four corners and the central crop.
-    The image can be a PIL Image or a torch Tensor, in which case it is expected
+    The image can be a PIL Image or a Tensor, in which case it is expected
     to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     .. Note::
         This transform returns a tuple of images and there may be a
         mismatch in the number of inputs and targets your ``Dataset`` returns.
 
     Args:
-       size (sequence or int): Desired output size of the crop. If size is an
-           int instead of sequence like (h, w), a square crop (size, size) is
-           made.
+        img (PIL Image or Tensor): Image to be cropped.
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
 
     Returns:
        tuple: tuple (tl, tr, bl, br, center)
@@ -673,13 +676,13 @@ def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor:
     """Adjust brightness of an Image.
 
     Args:
-        img (PIL Image or Torch Tensor): Image to be adjusted.
+        img (PIL Image or Tensor): Image to be adjusted.
         brightness_factor (float):  How much to adjust the brightness. Can be
             any non negative number. 0 gives a black image, 1 gives the
             original image while 2 increases the brightness by a factor of 2.
 
     Returns:
-        PIL Image or Torch Tensor: Brightness adjusted image.
+        PIL Image or Tensor: Brightness adjusted image.
     """
     if not isinstance(img, torch.Tensor):
         return F_pil.adjust_brightness(img, brightness_factor)
@@ -691,13 +694,13 @@ def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor:
     """Adjust contrast of an Image.
 
     Args:
-        img (PIL Image or Torch Tensor): Image to be adjusted.
+        img (PIL Image or Tensor): Image to be adjusted.
         contrast_factor (float): How much to adjust the contrast. Can be any
             non negative number. 0 gives a solid gray image, 1 gives the
             original image while 2 increases the contrast by a factor of 2.
 
     Returns:
-        PIL Image or Torch Tensor: Contrast adjusted image.
+        PIL Image or Tensor: Contrast adjusted image.
     """
     if not isinstance(img, torch.Tensor):
         return F_pil.adjust_contrast(img, contrast_factor)
@@ -709,13 +712,13 @@ def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor:
     """Adjust color saturation of an image.
 
     Args:
-        img (PIL Image or Torch Tensor): Image to be adjusted.
+        img (PIL Image or Tensor): Image to be adjusted.
         saturation_factor (float):  How much to adjust the saturation. 0 will
             give a black and white image, 1 will give the original image while
             2 will enhance the saturation by a factor of 2.
 
     Returns:
-        PIL Image or Torch Tensor: Saturation adjusted image.
+        PIL Image or Tensor: Saturation adjusted image.
     """
     if not isinstance(img, torch.Tensor):
         return F_pil.adjust_saturation(img, saturation_factor)
diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py
@@ -1,4 +1,5 @@
 import numbers
+from typing import Any, List
 
 import torch
 try:
@@ -10,24 +11,22 @@
 
 
 @torch.jit.unused
-def _is_pil_image(img):
-    # type: (Any) -> bool
+def _is_pil_image(img: Any) -> bool:
     if accimage is not None:
         return isinstance(img, (Image.Image, accimage.Image))
     else:
         return isinstance(img, Image.Image)
 
 
 @torch.jit.unused
-def _get_image_size(img):
-    # type: (Any) -> List[int]
+def _get_image_size(img: Any) -> List[int]:
     if _is_pil_image(img):
         return img.size
     raise TypeError("Unexpected type {}".format(type(img)))
 
 
 @torch.jit.unused
-def hflip(img):
+def hflip(img: Any):
     """Horizontally flip the given PIL Image.
 
     Args:
@@ -43,7 +42,7 @@ def hflip(img):
 
 
 @torch.jit.unused
-def vflip(img):
+def vflip(img: Any):
     """Vertically flip the given PIL Image.
 
     Args:
diff --git a/torchvision/transforms/functional_tensor.py b/torchvision/transforms/functional_tensor.py
@@ -49,7 +49,7 @@ def crop(img: Tensor, top: int, left: int, height: int, width: int):
     """Crop the given Image Tensor.
 
     Args:
-        img (Tensor): Image to be cropped in the form [C, H, W]. (0,0) denotes the top left corner of the image.
+        img (Tensor): Image to be cropped in the form [..., H, W]. (0,0) denotes the top left corner of the image.
         top (int): Vertical component of the top left corner of the crop box.
         left (int): Horizontal component of the top left corner of the crop box.
         height (int): Height of the crop box.
@@ -64,8 +64,7 @@ def crop(img: Tensor, top: int, left: int, height: int, width: int):
     return img[..., top:top + height, left:left + width]
 
 
-def rgb_to_grayscale(img):
-    # type: (Tensor) -> Tensor
+def rgb_to_grayscale(img: Tensor) -> Tensor:
     """Convert the given RGB Image Tensor to Grayscale.
     For RGB to Grayscale conversion, ITU-R 601-2 luma transform is performed which
     is L = R * 0.2989 + G * 0.5870 + B * 0.1140
@@ -83,8 +82,7 @@ def rgb_to_grayscale(img):
     return (0.2989 * img[0] + 0.5870 * img[1] + 0.1140 * img[2]).to(img.dtype)
 
 
-def adjust_brightness(img, brightness_factor):
-    # type: (Tensor, float) -> Tensor
+def adjust_brightness(img: Tensor, brightness_factor: float) -> Tensor:
     """Adjust brightness of an RGB image.
 
     Args:
@@ -102,8 +100,7 @@ def adjust_brightness(img, brightness_factor):
     return _blend(img, torch.zeros_like(img), brightness_factor)
 
 
-def adjust_contrast(img, contrast_factor):
-    # type: (Tensor, float) -> Tensor
+def adjust_contrast(img: Tensor, contrast_factor: float) -> Tensor:
     """Adjust contrast of an RGB image.
 
     Args:
@@ -171,8 +168,7 @@ def adjust_hue(img, hue_factor):
     return img_hue_adj
 
 
-def adjust_saturation(img, saturation_factor):
-    # type: (Tensor, float) -> Tensor
+def adjust_saturation(img: Tensor, saturation_factor: float) -> Tensor:
     """Adjust color saturation of an RGB image.
 
     Args:
@@ -190,12 +186,11 @@ def adjust_saturation(img, saturation_factor):
     return _blend(img, rgb_to_grayscale(img), saturation_factor)
 
 
-def center_crop(img, output_size):
-    # type: (Tensor, BroadcastingList2[int]) -> Tensor
+def center_crop(img: Tensor, output_size: BroadcastingList2[int]) -> Tensor:
     """Crop the Image Tensor and resize it to desired size.
 
     Args:
-        img (Tensor): Image to be cropped. (0,0) denotes the top left corner of the image.
+        img (Tensor): Image to be cropped.
         output_size (sequence or int): (height, width) of the crop box. If int,
                 it is used for both directions
 
@@ -213,17 +208,17 @@ def center_crop(img, output_size):
     return crop(img, crop_top, crop_left, crop_height, crop_width)
 
 
-def five_crop(img, size):
-    # type: (Tensor, BroadcastingList2[int]) -> List[Tensor]
+def five_crop(img: Tensor, size: BroadcastingList2[int]) -> List[Tensor]:
     """Crop the given Image Tensor into four corners and the central crop.
     .. Note::
         This transform returns a List of Tensors and there may be a
         mismatch in the number of inputs and targets your ``Dataset`` returns.
 
     Args:
-       size (sequence or int): Desired output size of the crop. If size is an
-           int instead of sequence like (h, w), a square crop (size, size) is
-           made.
+        img (Tensor): Image to be cropped.
+        size (sequence or int): Desired output size of the crop. If size is an
+            int instead of sequence like (h, w), a square crop (size, size) is
+            made.
 
     Returns:
        List: List (tl, tr, bl, br, center)
@@ -249,19 +244,20 @@ def five_crop(img, size):
     return [tl, tr, bl, br, center]
 
 
-def ten_crop(img, size, vertical_flip=False):
-    # type: (Tensor, BroadcastingList2[int], bool) -> List[Tensor]
+def ten_crop(img: Tensor, size: BroadcastingList2[int], vertical_flip: bool = False) -> List[Tensor]:
     """Crop the given Image Tensor into four corners and the central crop plus the
         flipped version of these (horizontal flipping is used by default).
+
     .. Note::
         This transform returns a List of images and there may be a
         mismatch in the number of inputs and targets your ``Dataset`` returns.
 
     Args:
-       size (sequence or int): Desired output size of the crop. If size is an
+        img (Tensor): Image to be cropped.
+        size (sequence or int): Desired output size of the crop. If size is an
             int instead of sequence like (h, w), a square crop (size, size) is
             made.
-       vertical_flip (bool): Use vertical flipping instead of horizontal
+        vertical_flip (bool): Use vertical flipping instead of horizontal
 
     Returns:
        List: List (tl, tr, bl, br, center, tl_flip, tr_flip, bl_flip, br_flip, center_flip)
@@ -284,8 +280,7 @@ def ten_crop(img, size, vertical_flip=False):
     return first_five + second_five
 
 
-def _blend(img1, img2, ratio):
-    # type: (Tensor, Tensor, float) -> Tensor
+def _blend(img1: Tensor, img2: Tensor, ratio: float) -> Tensor:
     bound = 1 if img1.dtype in [torch.half, torch.float32, torch.float64] else 255
     return (ratio * img1 + (1 - ratio) * img2).clamp(0, bound).to(img1.dtype)
 
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py