Updates according to the review

vfdev-5 · vfdev-5 · commit d226e1603680 · 2022-07-28T12:51:35.000+02:00
diff --git a/torchvision/prototype/transforms/_geometry.py b/torchvision/prototype/transforms/_geometry.py
@@ -233,6 +233,8 @@ def _check_padding_arg(padding: Union[int, Sequence[int]]) -> None:
         raise ValueError(f"Padding must be an int or a 1, 2, or 4 element tuple, not a {len(padding)} element tuple")
 
 
+# TODO: let's use torchvision._utils.StrEnum to have the best of both worlds (strings and enums)
+# https://github.com/pytorch/vision/issues/6250
 def _check_padding_mode_arg(padding_mode: Literal["constant", "edge", "reflect", "symmetric"]) -> None:
     if padding_mode not in ["constant", "edge", "reflect", "symmetric"]:
         raise ValueError("Padding mode should be either constant, edge, reflect or symmetric")
@@ -437,18 +439,18 @@ def __init__(
 
         self.size = _setup_size(size, error_msg="Please provide only two dimensions (h, w) for size.")
 
-        if padding is not None:
-            _check_padding_arg(padding)
-
-        if (padding is not None) or pad_if_needed:
-            _check_padding_mode_arg(padding_mode)
-            _check_fill_arg(fill)
-
         self.padding = padding
         self.pad_if_needed = pad_if_needed
         self.fill = fill
         self.padding_mode = padding_mode
 
+        self._pad_op = None
+        if self.padding is not None:
+            self._pad_op = Pad(self.padding, fill=self.fill, padding_mode=self.padding_mode)
+
+        if self.pad_if_needed:
+            self._pad_op = Pad(0, fill=self.fill, padding_mode=self.padding_mode)
+
     def _get_params(self, sample: Any) -> Dict[str, Any]:
         image = query_image(sample)
         _, height, width = get_image_dimensions(image)
@@ -466,34 +468,36 @@ def _get_params(self, sample: Any) -> Dict[str, Any]:
         left = torch.randint(0, width - output_width + 1, size=(1,)).item()
         return dict(top=top, left=left, height=output_height, width=output_width)
 
-    def _forward(self, flat_inputs: List[Any]) -> List[Any]:
-        if self.padding is not None:
-            flat_inputs = [F.pad(flat_input, self.padding, self.fill, self.padding_mode) for flat_input in flat_inputs]
-
-        image = query_image(flat_inputs)
-        _, height, width = get_image_dimensions(image)
+    def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
+        return F.crop(inpt, **params)
 
-        # pad the width if needed
-        if self.pad_if_needed and width < self.size[1]:
-            padding = [self.size[1] - width, 0]
-            flat_inputs = [F.pad(flat_input, padding, self.fill, self.padding_mode) for flat_input in flat_inputs]
-        # pad the height if needed
-        if self.pad_if_needed and height < self.size[0]:
-            padding = [0, self.size[0] - height]
-            flat_inputs = [F.pad(flat_input, padding, self.fill, self.padding_mode) for flat_input in flat_inputs]
+    def forward(self, *inputs: Any) -> Any:
+        sample = inputs if len(inputs) > 1 else inputs[0]
 
-        params = self._get_params(flat_inputs)
+        if self._pad_op is not None:
+            sample = self._pad_op(sample)
 
-        return [F.crop(flat_input, **params) for flat_input in flat_inputs]
+        image = query_image(sample)
+        _, height, width = get_image_dimensions(image)
 
-    def forward(self, *inputs: Any) -> Any:
-        from torch.utils._pytree import tree_flatten, tree_unflatten
+        if self.pad_if_needed:
+            # This check is to explicitly ensure that self._pad_op is defined
+            if self._pad_op is None:
+                raise RuntimeError(
+                    "Internal error, self._pad_op is None. "
+                    "Please, fill an issue about that on https://github.com/pytorch/vision/issues"
+                )
 
-        sample = inputs if len(inputs) > 1 else inputs[0]
+            # pad the width if needed
+            if width < self.size[1]:
+                self._pad_op.padding = [self.size[1] - width, 0]
+                sample = self._pad_op(sample)
+            # pad the height if needed
+            if height < self.size[0]:
+                self._pad_op.padding = [0, self.size[0] - height]
+                sample = self._pad_op(sample)
 
-        flat_inputs, spec = tree_flatten(sample)
-        out_flat_inputs = self._forward(flat_inputs)
-        return tree_unflatten(out_flat_inputs, spec)
+        return super().forward(sample)
 
 
 class RandomPerspective(_RandomApplyTransform):
diff --git a/torchvision/prototype/transforms/_transform.py b/torchvision/prototype/transforms/_transform.py
@@ -1,10 +1,11 @@
 import enum
-import functools
 from typing import Any, Dict
 
+import PIL.Image
 import torch
 from torch import nn
-from torchvision.prototype.utils._internal import apply_recursively
+from torch.utils._pytree import tree_flatten, tree_unflatten
+from torchvision.prototype.features import _Feature
 from torchvision.utils import _log_api_usage_once
 
 
@@ -16,12 +17,20 @@ def __init__(self) -> None:
     def _get_params(self, sample: Any) -> Dict[str, Any]:
         return dict()
 
-    def _transform(self, input: Any, params: Dict[str, Any]) -> Any:
+    def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
         raise NotImplementedError
 
     def forward(self, *inputs: Any) -> Any:
         sample = inputs if len(inputs) > 1 else inputs[0]
-        return apply_recursively(functools.partial(self._transform, params=self._get_params(sample)), sample)
+
+        params = self._get_params(sample)
+
+        flat_inputs, spec = tree_flatten(sample)
+        transformed_types = (torch.Tensor, _Feature, PIL.Image.Image)
+        flat_outputs = [
+            self._transform(inpt, params) if isinstance(inpt, transformed_types) else inpt for inpt in flat_inputs
+        ]
+        return tree_unflatten(flat_outputs, spec)
 
     def extra_repr(self) -> str:
         extra = []
diff --git a/torchvision/prototype/transforms/_utils.py b/torchvision/prototype/transforms/_utils.py
@@ -1,10 +1,9 @@
-from typing import Any, Iterator, Optional, Tuple, Type, Union
+from typing import Any, Tuple, Type, Union
 
 import PIL.Image
 import torch
 from torch.utils._pytree import tree_flatten
 from torchvision.prototype import features
-from torchvision.prototype.utils._internal import query_recursively
 
 from .functional._meta import get_dimensions_image_pil, get_dimensions_image_tensor
 
@@ -18,22 +17,6 @@ def query_image(sample: Any) -> Union[PIL.Image.Image, torch.Tensor, features.Im
     raise TypeError("No image was found in the sample")
 
 
-# vfdev-5: let's use tree_flatten instead of query_recursively and internal fn to make the code simplier
-def query_image_(sample: Any) -> Union[PIL.Image.Image, torch.Tensor, features.Image]:
-    def fn(
-        id: Tuple[Any, ...], input: Any
-    ) -> Optional[Tuple[Tuple[Any, ...], Union[PIL.Image.Image, torch.Tensor, features.Image]]]:
-        if type(input) == torch.Tensor or isinstance(input, (PIL.Image.Image, features.Image)):
-            return id, input
-
-        return None
-
-    try:
-        return next(query_recursively(fn, sample))[1]
-    except StopIteration:
-        raise TypeError("No image was found in the sample")
-
-
 def get_image_dimensions(image: Union[PIL.Image.Image, torch.Tensor, features.Image]) -> Tuple[int, int, int]:
     if isinstance(image, features.Image):
         channels = image.num_channels
@@ -47,16 +30,14 @@ def get_image_dimensions(image: Union[PIL.Image.Image, torch.Tensor, features.Im
     return channels, height, width
 
 
-def _extract_types(sample: Any) -> Iterator[Type]:
-    return query_recursively(lambda id, input: type(input), sample)
-
-
 def has_any(sample: Any, *types: Type) -> bool:
-    return any(issubclass(type, types) for type in _extract_types(sample))
+    flat_sample, _ = tree_flatten(sample)
+    return any(issubclass(type(obj), types) for obj in flat_sample)
 
 
 def has_all(sample: Any, *types: Type) -> bool:
-    return not bool(set(types) - set(_extract_types(sample)))
+    flat_sample, _ = tree_flatten(sample)
+    return not bool(set(types) - set([type(obj) for obj in flat_sample]))
 
 
 def is_simple_tensor(input: Any) -> bool:
diff --git a/torchvision/prototype/transforms/functional/_geometry.py b/torchvision/prototype/transforms/functional/_geometry.py
@@ -814,6 +814,7 @@ def elastic_bounding_box(
     format: features.BoundingBoxFormat,
     displacement: torch.Tensor,
 ) -> torch.Tensor:
+    # TODO: add in docstring about approximation we are doing for grid inversion
     displacement = displacement.to(bounding_box.device)
 
     original_shape = bounding_box.shape
diff --git a/torchvision/transforms/functional_pil.py b/torchvision/transforms/functional_pil.py
@@ -260,7 +260,7 @@ def _parse_fill(
 ) -> Dict[str, Optional[Union[float, List[float], Tuple[float, ...]]]]:
 
     # Process fill color for affine transforms
-    num_bands = len(img.getbands())
+    num_bands = get_image_num_channels(img)
     if fill is None:
         fill = 0
     if isinstance(fill, (int, float)) and num_bands > 1: