Unified TenCrop and F.ten_crop

vfdev-5 · vfdev-5 · commit 94e29a486f23 · 2020-06-26T14:20:55.000+02:00
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
@@ -195,6 +195,24 @@ def test_five_crop(self):
             "five_crop", "FiveCrop", out_length=5, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
         )
 
+    def test_ten_crop(self):
+        fn_kwargs = meth_kwargs = {"size": (5,)}
+        self._test_geom_op_list_output(
+            "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = meth_kwargs = {"size": [5, ]}
+        self._test_geom_op_list_output(
+            "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = meth_kwargs = {"size": (4, 5)}
+        self._test_geom_op_list_output(
+            "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+        fn_kwargs = meth_kwargs = {"size": [4, 5]}
+        self._test_geom_op_list_output(
+            "ten_crop", "TenCrop", out_length=10, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs
+        )
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
@@ -636,19 +636,22 @@ def five_crop(img: Tensor, size: List[int]) -> Tuple[Tensor, Tensor, Tensor, Ten
     return tl, tr, bl, br, center
 
 
-def ten_crop(img, size, vertical_flip=False):
-    """Generate ten cropped images from the given PIL Image.
-    Crop the given PIL Image into four corners and the central crop plus the
+def ten_crop(img: Tensor, size: List[int], vertical_flip: bool = False) -> List[Tensor]:
+    """Generate ten cropped images from the given image.
+    Crop the given image into four corners and the central crop plus the
     flipped version of these (horizontal flipping is used by default).
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading dimensions
 
     .. Note::
         This transform returns a tuple of images and there may be a
         mismatch in the number of inputs and targets your ``Dataset`` returns.
 
     Args:
+        img (PIL Image or Tensor): Image to be cropped.
         size (sequence or int): Desired output size of the crop. If size is an
             int instead of sequence like (h, w), a square crop (size, size) is
-            made.
+            made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
         vertical_flip (bool): Use vertical flipping instead of horizontal
 
     Returns:
@@ -658,8 +661,11 @@ def ten_crop(img, size, vertical_flip=False):
     """
     if isinstance(size, numbers.Number):
         size = (int(size), int(size))
-    else:
-        assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+    elif isinstance(size, (tuple, list)) and len(size) == 1:
+        size = (size[0], size[0])
+
+    if len(size) != 2:
+        raise ValueError("Please provide only two dimensions (h, w) for size.")
 
     first_five = five_crop(img, size)
 
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
@@ -826,17 +826,20 @@ def forward(self, img):
             img (PIL Image or Tensor): Image to be cropped.
 
         Returns:
-            PIL Image or Tensor: Cropped image.
+            tuple of 5 images. Image can be PIL Image or Tensor
         """
         return F.five_crop(img, self.size)
 
     def __repr__(self):
         return self.__class__.__name__ + '(size={0})'.format(self.size)
 
 
-class TenCrop(object):
-    """Crop the given PIL Image into four corners and the central crop plus the flipped version of
-    these (horizontal flipping is used by default)
+class TenCrop(torch.nn.Module):
+    """Crop the given image into four corners and the central crop plus the flipped version of
+    these (horizontal flipping is used by default).
+    The image can be a PIL Image or a Tensor, in which case it is expected
+    to have [..., H, W] shape, where ... means an arbitrary number of leading
+    dimensions
 
     .. Note::
          This transform returns a tuple of images and there may be a mismatch in the number of
@@ -846,7 +849,7 @@ class TenCrop(object):
     Args:
         size (sequence or int): Desired output size of the crop. If size is an
             int instead of sequence like (h, w), a square crop (size, size) is
-            made.
+            made. If provided a tuple or list of length 1, it will be interpreted as (size[0], size[0]).
         vertical_flip (bool): Use vertical flipping instead of horizontal
 
     Example:
@@ -862,15 +865,26 @@ class TenCrop(object):
     """
 
     def __init__(self, size, vertical_flip=False):
-        self.size = size
+        super().__init__()
         if isinstance(size, numbers.Number):
             self.size = (int(size), int(size))
+        elif isinstance(size, Sequence) and len(size) == 1:
+            self.size = (size[0], size[0])
         else:
-            assert len(size) == 2, "Please provide only two dimensions (h, w) for size."
+            if len(size) != 2:
+                raise ValueError("Please provide only two dimensions (h, w) for size.")
+
             self.size = size
         self.vertical_flip = vertical_flip
 
-    def __call__(self, img):
+    def forward(self, img):
+        """
+        Args:
+            img (PIL Image or Tensor): Image to be cropped.
+
+        Returns:
+            tuple of 10 images. Image can be PIL Image or Tensor
+        """
         return F.ten_crop(img, self.size, self.vertical_flip)
 
     def __repr__(self):