pytorch · datumbox · Sep 20, 2022 · Sep 14, 2022 · Sep 14, 2022 · Sep 16, 2022
diff --git a/test/prototype_transforms_dispatcher_infos.py b/test/prototype_transforms_dispatcher_infos.py
@@ -113,14 +113,21 @@ def sample_inputs(self, *types):
             features.Mask: F.pad_mask,
         },
     ),
-    DispatcherInfo(
-        F.perspective,
-        kernels={
-            features.Image: F.perspective_image_tensor,
-            features.BoundingBox: F.perspective_bounding_box,
-            features.Mask: F.perspective_mask,
-        },
-    ),
+    # FIXME:
+    # RuntimeError: perspective() is missing value for argument 'startpoints'.
+    # Declaration: perspective(Tensor inpt, int[][] startpoints, int[][] endpoints,
+    # Enum<__torch__.torchvision.transforms.functional.InterpolationMode> interpolation=Enum<InterpolationMode.BILINEAR>,
+    # Union(float[], float, int, NoneType) fill=None) -> Tensor
+    #
+    # This is probably due to the fact that F.perspective does not have the same signature as F.perspective_image_tensor
+    # DispatcherInfo(
+    #     F.perspective,
+    #     kernels={
+    #         features.Image: F.perspective_image_tensor,
+    #         features.BoundingBox: F.perspective_bounding_box,
+    #         features.Mask: F.perspective_mask,
+    #     },
+    # ),
     DispatcherInfo(
         F.center_crop,
         kernels={

diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py
@@ -376,6 +376,9 @@ def test__transform(self, padding, fill, padding_mode, mocker):
         inpt = mocker.MagicMock(spec=features.Image)
         _ = transform(inpt)
 
+        fill = transforms.functional._geometry._convert_fill_arg(fill)
+        if isinstance(padding, tuple):
+            padding = list(padding)
         fn.assert_called_once_with(inpt, padding=padding, fill=fill, padding_mode=padding_mode)
 
     @pytest.mark.parametrize("fill", [12, {features.Image: 12, features.Mask: 34}])
@@ -389,14 +392,17 @@ def test__transform_image_mask(self, fill, mocker):
         _ = transform(inpt)
 
         if isinstance(fill, int):
+            fill = transforms.functional._geometry._convert_fill_arg(fill)
             calls = [
                 mocker.call(image, padding=1, fill=fill, padding_mode="constant"),
                 mocker.call(mask, padding=1, fill=fill, padding_mode="constant"),
             ]
         else:
+            fill_img = transforms.functional._geometry._convert_fill_arg(fill[type(image)])
+            fill_mask = transforms.functional._geometry._convert_fill_arg(fill[type(mask)])
             calls = [
-                mocker.call(image, padding=1, fill=fill[type(image)], padding_mode="constant"),
-                mocker.call(mask, padding=1, fill=fill[type(mask)], padding_mode="constant"),
+                mocker.call(image, padding=1, fill=fill_img, padding_mode="constant"),
+                mocker.call(mask, padding=1, fill=fill_mask, padding_mode="constant"),
             ]
         fn.assert_has_calls(calls)
 
@@ -447,6 +453,7 @@ def test__transform(self, fill, side_range, mocker):
         torch.rand(1)  # random apply changes random state
         params = transform._get_params(inpt)
 
+        fill = transforms.functional._geometry._convert_fill_arg(fill)
         fn.assert_called_once_with(inpt, **params, fill=fill)
 
     @pytest.mark.parametrize("fill", [12, {features.Image: 12, features.Mask: 34}])
@@ -465,14 +472,17 @@ def test__transform_image_mask(self, fill, mocker):
         params = transform._get_params(inpt)
 
         if isinstance(fill, int):
+            fill = transforms.functional._geometry._convert_fill_arg(fill)
             calls = [
                 mocker.call(image, **params, fill=fill),
                 mocker.call(mask, **params, fill=fill),
             ]
         else:
+            fill_img = transforms.functional._geometry._convert_fill_arg(fill[type(image)])
+            fill_mask = transforms.functional._geometry._convert_fill_arg(fill[type(mask)])
             calls = [
-                mocker.call(image, **params, fill=fill[type(image)]),
-                mocker.call(mask, **params, fill=fill[type(mask)]),
+                mocker.call(image, **params, fill=fill_img),
+                mocker.call(mask, **params, fill=fill_mask),
             ]
         fn.assert_has_calls(calls)
 
@@ -533,6 +543,7 @@ def test__transform(self, degrees, expand, fill, center, mocker):
         torch.manual_seed(12)
         params = transform._get_params(inpt)
 
+        fill = transforms.functional._geometry._convert_fill_arg(fill)
         fn.assert_called_once_with(inpt, **params, interpolation=interpolation, expand=expand, fill=fill, center=center)
 
     @pytest.mark.parametrize("angle", [34, -87])
@@ -670,6 +681,7 @@ def test__transform(self, degrees, translate, scale, shear, fill, center, mocker
         torch.manual_seed(12)
         params = transform._get_params(inpt)
 
+        fill = transforms.functional._geometry._convert_fill_arg(fill)
         fn.assert_called_once_with(inpt, **params, interpolation=interpolation, fill=fill, center=center)
 
 
@@ -917,6 +929,7 @@ def test__transform(self, distortion_scale, mocker):
         torch.rand(1)  # random apply changes random state
         params = transform._get_params(inpt)
 
+        fill = transforms.functional._geometry._convert_fill_arg(fill)
         fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation)
 
 
@@ -986,6 +999,7 @@ def test__transform(self, alpha, sigma, mocker):
         transform._get_params = mocker.MagicMock()
         _ = transform(inpt)
         params = transform._get_params(inpt)
+        fill = transforms.functional._geometry._convert_fill_arg(fill)
         fn.assert_called_once_with(inpt, **params, fill=fill, interpolation=interpolation)
 
 
@@ -1609,6 +1623,7 @@ def test__transform(self, mocker, needs):
             if not needs_crop:
                 assert args[0] is inpt_sentinel
             assert args[1] is padding_sentinel
+            fill_sentinel = transforms.functional._geometry._convert_fill_arg(fill_sentinel)
             assert kwargs == dict(fill=fill_sentinel, padding_mode=padding_mode_sentinel)
         else:
             mock_pad.assert_not_called()

diff --git a/test/test_prototype_transforms_dispatchers.py b/test/test_prototype_transforms_dispatchers.py
@@ -9,7 +9,6 @@
 
 
 class TestCommon:
-    @pytest.mark.xfail(reason="dispatchers are currently not scriptable")
     @pytest.mark.parametrize(
         ("info", "args_kwargs"),
         [

diff --git a/test/test_prototype_transforms_functional.py b/test/test_prototype_transforms_functional.py
@@ -407,27 +407,74 @@ def erase_image_tensor():
         yield ArgsKwargs(image, i=1, j=2, h=6, w=7, v=torch.rand(c, 6, 7))
 
 
+_KERNEL_TYPES = {"_image_tensor", "_image_pil", "_mask", "_bounding_box", "_label"}
+
+
+def _distinct_callables(callable_names):
+    # Ensure we deduplicate callables (due to aliases) without losing the names on the new API
+    remove = set()
+    distinct = set()
+    for name in callable_names:
+        item = F.__dict__[name]
+        if item not in distinct:
+            distinct.add(item)
+        else:
+            remove.add(name)
+    callable_names -= remove
+
+    # create tuple and sort by name
+    return sorted([(name, F.__dict__[name]) for name in callable_names], key=lambda t: t[0])
+
+
+def _get_distinct_kernels():
+    kernel_names = {
+        name
+        for name, f in F.__dict__.items()
+        if callable(f) and not name.startswith("_") and any(name.endswith(k) for k in _KERNEL_TYPES)
+    }
+    return _distinct_callables(kernel_names)
+
+
+def _get_distinct_midlevels():
+    midlevel_names = {
+        name
+        for name, f in F.__dict__.items()
+        if callable(f) and not name.startswith("_") and not any(name.endswith(k) for k in _KERNEL_TYPES)
+    }
+    return _distinct_callables(midlevel_names)
+
+
 @pytest.mark.parametrize(
     "kernel",
     [
         pytest.param(kernel, id=name)
-        for name, kernel in F.__dict__.items()
-        if not name.startswith("_")
-        and callable(kernel)
-        and any(feature_type in name for feature_type in {"image", "mask", "bounding_box", "label"})
-        and "pil" not in name
-        and name
+        for name, kernel in _get_distinct_kernels()
+        if not name.endswith("_image_pil") and name not in {"to_image_tensor"}
+    ],
+)
+def test_scriptable_kernel(kernel):
+    jit.script(kernel)  # TODO: pass data through it
+
+
+@pytest.mark.parametrize(
+    "midlevel",
+    [
+        pytest.param(midlevel, id=name)
+        for name, midlevel in _get_distinct_midlevels()
+        if name
         not in {
-            "to_image_tensor",
-            "get_num_channels",
-            "get_spatial_size",
-            "get_image_num_channels",
-            "get_image_size",
+            "InterpolationMode",
+            "decode_image_with_pil",
+            "decode_video_with_av",
+            "pil_to_tensor",
+            "to_grayscale",
+            "to_pil_image",
+            "to_tensor",
         }
     ],
 )
-def test_scriptable(kernel):
-    jit.script(kernel)
+def test_scriptable_midlevel(midlevel):
+    jit.script(midlevel)  # TODO: pass data through it
 
 
 # Test below is intended to test mid-level op vs low-level ops it calls
@@ -439,8 +486,8 @@ def test_scriptable(kernel):
     [
         pytest.param(func, id=name)
         for name, func in F.__dict__.items()
-        if not name.startswith("_")
-        and callable(func)
+        if not name.startswith("_") and callable(func)
+        # TODO: remove aliases
         and all(feature_type not in name for feature_type in {"image", "mask", "bounding_box", "label", "pil"})
         and name
         not in {

diff --git a/torchvision/prototype/features/__init__.py b/torchvision/prototype/features/__init__.py
@@ -1,6 +1,6 @@
 from ._bounding_box import BoundingBox, BoundingBoxFormat
 from ._encoded import EncodedData, EncodedImage, EncodedVideo
-from ._feature import _Feature, is_simple_tensor
-from ._image import ColorSpace, Image
+from ._feature import _Feature, DType, is_simple_tensor
+from ._image import ColorSpace, Image, ImageType
 from ._label import Label, OneHotLabel
 from ._mask import Mask
diff --git a/torchvision/prototype/features/_feature.py b/torchvision/prototype/features/_feature.py
@@ -10,6 +10,11 @@
 F = TypeVar("F", bound="_Feature")
 
 
+# Due to torch.jit.script limitation we keep DType as torch.Tensor
+# instead of Union[torch.Tensor, PIL.Image.Image, features._Feature]
+DType = torch.Tensor
+
+
 def is_simple_tensor(inpt: Any) -> bool:
     return isinstance(inpt, torch.Tensor) and not isinstance(inpt, _Feature)
 

diff --git a/torchvision/prototype/features/_image.py b/torchvision/prototype/features/_image.py
@@ -12,6 +12,11 @@
 from ._feature import _Feature
 
 
+# Due to torch.jit.script limitation we keep ImageType as torch.Tensor
+# instead of Union[torch.Tensor, PIL.Image.Image, features.Image]
+ImageType = torch.Tensor
+
+
 class ColorSpace(StrEnum):
     OTHER = StrEnum.auto()
     GRAY = StrEnum.auto()
@@ -32,6 +37,31 @@ def from_pil_mode(cls, mode: str) -> ColorSpace:
         else:
             return cls.OTHER
 
+    @staticmethod
+    def from_tensor_shape(shape: List[int]) -> ColorSpace:
+        return _from_tensor_shape(shape)
+
+
+def _from_tensor_shape(shape: List[int]) -> ColorSpace:
+    # Needed as a standalone method for JIT
+    ndim = len(shape)
+    if ndim < 2:
+        return ColorSpace.OTHER
+    elif ndim == 2:
+        return ColorSpace.GRAY
+
+    num_channels = shape[-3]
+    if num_channels == 1:
+        return ColorSpace.GRAY
+    elif num_channels == 2:
+        return ColorSpace.GRAY_ALPHA
+    elif num_channels == 3:
+        return ColorSpace.RGB
+    elif num_channels == 4:
+        return ColorSpace.RGB_ALPHA
+    else:
+        return ColorSpace.OTHER
+
 
 class Image(_Feature):
     color_space: ColorSpace
@@ -53,7 +83,7 @@ def __new__(
         image = super().__new__(cls, data, requires_grad=requires_grad)
 
         if color_space is None:
-            color_space = cls.guess_color_space(image)
+            color_space = ColorSpace.from_tensor_shape(image.shape)  # type: ignore[arg-type]
             if color_space == ColorSpace.OTHER:
                 warnings.warn("Unable to guess a specific color space. Consider passing it explicitly.")
         elif isinstance(color_space, str):
@@ -83,25 +113,6 @@ def image_size(self) -> Tuple[int, int]:
     def num_channels(self) -> int:
         return self.shape[-3]
 
-    @staticmethod
-    def guess_color_space(data: torch.Tensor) -> ColorSpace:
-        if data.ndim < 2:
-            return ColorSpace.OTHER
-        elif data.ndim == 2:
-            return ColorSpace.GRAY
-
-        num_channels = data.shape[-3]
-        if num_channels == 1:
-            return ColorSpace.GRAY
-        elif num_channels == 2:
-            return ColorSpace.GRAY_ALPHA
-        elif num_channels == 3:
-            return ColorSpace.RGB
-        elif num_channels == 4:
-            return ColorSpace.RGB_ALPHA
-        else:
-            return ColorSpace.OTHER
-
     def to_color_space(self, color_space: Union[str, ColorSpace], copy: bool = True) -> Image:
         if isinstance(color_space, str):
             color_space = ColorSpace.from_str(color_space.upper())

diff --git a/torchvision/prototype/transforms/_auto_augment.py b/torchvision/prototype/transforms/_auto_augment.py
@@ -72,11 +72,10 @@ def _apply_image_transform(
 
         # Fill = 0 is not equivalent to None, https://github.com/pytorch/vision/issues/6517
         # So, we have to put fill as None if fill == 0
-        fill_: Optional[Union[int, float, Sequence[int], Sequence[float]]]
+        # This is due to BC with stable API which has fill = None by default
+        fill_ = F._geometry._convert_fill_arg(fill)
         if isinstance(fill, int) and fill == 0:
             fill_ = None
-        else:
-            fill_ = fill
 
         if transform_id == "Identity":
             return image