pytorch
diff --git a/‎README.rst
Lines changed: 3 additions & 1 deletion b/‎README.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎gallery/plot_visualization_utils.py
Lines changed: 1 addition & 1 deletion b/‎gallery/plot_visualization_utils.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎references/detection/transforms.py
Lines changed: 112 additions & 1 deletion b/‎references/detection/transforms.py
Lines changed: 112 additions & 1 deletion
diff --git a/‎test/builtin_dataset_mocks.py
Lines changed: 7 additions & 7 deletions b/‎test/builtin_dataset_mocks.py
Lines changed: 7 additions & 7 deletions
diff --git a/‎test/common_utils.py
Lines changed: 2 additions & 2 deletions b/‎test/common_utils.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎test/test_prototype_utils.py renamed to ‎test/test_internal_utils.py
Lines changed: 1 addition & 1 deletion b/‎test/test_prototype_utils.py renamed to ‎test/test_internal_utils.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/test_prototype_builtin_datasets.py
Lines changed: 3 additions & 1 deletion b/‎test/test_prototype_builtin_datasets.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎test/test_prototype_transforms.py
Lines changed: 56 additions & 2 deletions b/‎test/test_prototype_transforms.py
Lines changed: 56 additions & 2 deletions
diff --git a/‎test/test_transforms.py
Lines changed: 3 additions & 3 deletions b/‎test/test_transforms.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎torchvision/_utils.py
Lines changed: 13 additions & 1 deletion b/‎torchvision/_utils.py
Lines changed: 13 additions & 1 deletion
diff --git a/‎torchvision/prototype/datasets/utils/_dataset.py
Lines changed: 2 additions & 1 deletion b/‎torchvision/prototype/datasets/utils/_dataset.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎torchvision/prototype/features/_bounding_box.py
Lines changed: 1 addition & 1 deletion b/‎torchvision/prototype/features/_bounding_box.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎torchvision/prototype/transforms/__init__.py
Lines changed: 10 additions & 1 deletion b/‎torchvision/prototype/transforms/__init__.py
Lines changed: 10 additions & 1 deletion
@@ -21,7 +21,9 @@ supported Python versions.
 +--------------------------+--------------------------+---------------------------------+
 | ``torch``                | ``torchvision``          | ``python``                      |
 +==========================+==========================+=================================+
-| ``main`` / ``nightly``   | ``main`` / ``nightly``   | ``>=3.7``, ``<=3.9``            |
+| ``main`` / ``nightly``   | ``main`` / ``nightly``   | ``>=3.7``, ``<=3.10``           |
++--------------------------+--------------------------+---------------------------------+
+| ``1.11.0``               | ``0.12.0``               | ``>=3.7``, ``<=3.10``           |
 +--------------------------+--------------------------+---------------------------------+
 | ``1.10.2``               | ``0.11.3``               | ``>=3.6``, ``<=3.9``            |
 +--------------------------+--------------------------+---------------------------------+
 
@@ -22,7 +22,7 @@
 def show(imgs):
     if not isinstance(imgs, list):
         imgs = [imgs]
-    fix, axs = plt.subplots(ncols=len(imgs), squeeze=False)
+    fig, axs = plt.subplots(ncols=len(imgs), squeeze=False)
     for i, img in enumerate(imgs):
         img = img.detach()
         img = F.to_pil_image(img)
 
@@ -1,4 +1,4 @@
-from typing import List, Tuple, Dict, Optional
+from typing import List, Tuple, Dict, Optional, Union
 
 import torch
 import torchvision
@@ -326,3 +326,114 @@ def forward(
                 )
 
         return image, target
+
+
+class FixedSizeCrop(nn.Module):
+    def __init__(self, size, fill=0, padding_mode="constant"):
+        super().__init__()
+        size = tuple(T._setup_size(size, error_msg="Please provide only two dimensions (h, w) for size."))
+        self.crop_height = size[0]
+        self.crop_width = size[1]
+        self.fill = fill  # TODO: Fill is currently respected only on PIL. Apply tensor patch.
+        self.padding_mode = padding_mode
+
+    def _pad(self, img, target, padding):
+        # Taken from the functional_tensor.py pad
+        if isinstance(padding, int):
+            pad_left = pad_right = pad_top = pad_bottom = padding
+        elif len(padding) == 1:
+            pad_left = pad_right = pad_top = pad_bottom = padding[0]
+        elif len(padding) == 2:
+            pad_left = pad_right = padding[0]
+            pad_top = pad_bottom = padding[1]
+        else:
+            pad_left = padding[0]
+            pad_top = padding[1]
+            pad_right = padding[2]
+            pad_bottom = padding[3]
+
+        padding = [pad_left, pad_top, pad_right, pad_bottom]
+        img = F.pad(img, padding, self.fill, self.padding_mode)
+        if target is not None:
+            target["boxes"][:, 0::2] += pad_left
+            target["boxes"][:, 1::2] += pad_top
+            if "masks" in target:
+                target["masks"] = F.pad(target["masks"], padding, 0, "constant")
+
+        return img, target
+
+    def _crop(self, img, target, top, left, height, width):
+        img = F.crop(img, top, left, height, width)
+        if target is not None:
+            boxes = target["boxes"]
+            boxes[:, 0::2] -= left
+            boxes[:, 1::2] -= top
+            boxes[:, 0::2].clamp_(min=0, max=width)
+            boxes[:, 1::2].clamp_(min=0, max=height)
+
+            is_valid = (boxes[:, 0] < boxes[:, 2]) & (boxes[:, 1] < boxes[:, 3])
+
+            target["boxes"] = boxes[is_valid]
+            target["labels"] = target["labels"][is_valid]
+            if "masks" in target:
+                target["masks"] = F.crop(target["masks"][is_valid], top, left, height, width)
+
+        return img, target
+
+    def forward(self, img, target=None):
+        _, height, width = F.get_dimensions(img)
+        new_height = min(height, self.crop_height)
+        new_width = min(width, self.crop_width)
+
+        if new_height != height or new_width != width:
+            offset_height = max(height - self.crop_height, 0)
+            offset_width = max(width - self.crop_width, 0)
+
+            r = torch.rand(1)
+            top = int(offset_height * r)
+            left = int(offset_width * r)
+
+            img, target = self._crop(img, target, top, left, new_height, new_width)
+
+        pad_bottom = max(self.crop_height - new_height, 0)
+        pad_right = max(self.crop_width - new_width, 0)
+        if pad_bottom != 0 or pad_right != 0:
+            img, target = self._pad(img, target, [0, 0, pad_right, pad_bottom])
+
+        return img, target
+
+
+class RandomShortestSize(nn.Module):
+    def __init__(
+        self,
+        min_size: Union[List[int], Tuple[int], int],
+        max_size: int,
+        interpolation: InterpolationMode = InterpolationMode.BILINEAR,
+    ):
+        super().__init__()
+        self.min_size = [min_size] if isinstance(min_size, int) else list(min_size)
+        self.max_size = max_size
+        self.interpolation = interpolation
+
+    def forward(
+        self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
+    ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
+        _, orig_height, orig_width = F.get_dimensions(image)
+
+        min_size = self.min_size[torch.randint(len(self.min_size), (1,)).item()]
+        r = min(min_size / min(orig_height, orig_width), self.max_size / max(orig_height, orig_width))
+
+        new_width = int(orig_width * r)
+        new_height = int(orig_height * r)
+
+        image = F.resize(image, [new_height, new_width], interpolation=self.interpolation)
+
+        if target is not None:
+            target["boxes"][:, 0::2] *= new_width / orig_width
+            target["boxes"][:, 1::2] *= new_height / orig_height
+            if "masks" in target:
+                target["masks"] = F.resize(
+                    target["masks"], [new_height, new_width], interpolation=InterpolationMode.NEAREST
+                )
+
+        return image, target
@@ -19,8 +19,8 @@
 from datasets_utils import make_zip, make_tar, create_image_folder, create_image_file
 from torch.nn.functional import one_hot
 from torch.testing import make_tensor as _make_tensor
+from torchvision._utils import sequence_to_str
 from torchvision.prototype.datasets._api import find
-from torchvision.prototype.utils._internal import sequence_to_str
 
 make_tensor = functools.partial(_make_tensor, device="cpu")
 make_scalar = functools.partial(make_tensor, ())
@@ -1329,20 +1329,20 @@ def cub200(info, root, config):
 
 @register_mock
 def eurosat(info, root, config):
-    data_folder = pathlib.Path(root, "eurosat", "2750")
+    data_folder = root / "2750"
     data_folder.mkdir(parents=True)
 
     num_examples_per_class = 3
-    classes = ("AnnualCrop", "Forest")
-    for cls in classes:
+    categories = ["AnnualCrop", "Forest"]
+    for category in categories:
         create_image_folder(
             root=data_folder,
-            name=cls,
-            file_name_fn=lambda idx: f"{cls}_{idx}.jpg",
+            name=category,
+            file_name_fn=lambda idx: f"{category}_{idx + 1}.jpg",
             num_examples=num_examples_per_class,
         )
     make_zip(root, "EuroSAT.zip", data_folder)
-    return len(classes) * num_examples_per_class
+    return len(categories) * num_examples_per_class
 
 
 @register_mock
 
@@ -137,7 +137,7 @@ def _create_data_batch(height=3, width=3, channels=3, num_samples=4, device="cpu
     return batch_tensor
 
 
-assert_equal = functools.partial(torch.testing.assert_close, rtol=0, atol=1e-6)
+assert_equal = functools.partial(torch.testing.assert_close, rtol=0, atol=0)
 
 
 def get_list_of_videos(tmpdir, num_videos=5, sizes=None, fps=None):
@@ -195,7 +195,7 @@ def _test_fn_on_batch(batch_tensors, fn, scripted_fn_atol=1e-8, **fn_kwargs):
     for i in range(len(batch_tensors)):
         img_tensor = batch_tensors[i, ...]
         transformed_img = fn(img_tensor, **fn_kwargs)
-        assert_equal(transformed_img, transformed_batch[i, ...])
+        torch.testing.assert_close(transformed_img, transformed_batch[i, ...], rtol=0, atol=1e-6)
 
     if scripted_fn_atol >= 0:
         scripted_fn = torch.jit.script(fn)
 
@@ -1,5 +1,5 @@
 import pytest
-from torchvision.prototype.utils._internal import sequence_to_str
+from torchvision._utils import sequence_to_str
 
 
 @pytest.mark.parametrize(
 
@@ -10,8 +10,8 @@
 from torch.utils.data.datapipes.iter.grouping import ShardingFilterIterDataPipe as ShardingFilter
 from torch.utils.data.graph import traverse
 from torchdata.datapipes.iter import IterDataPipe, Shuffler
+from torchvision._utils import sequence_to_str
 from torchvision.prototype import transforms, datasets
-from torchvision.prototype.utils._internal import sequence_to_str
 
 
 assert_samples_equal = functools.partial(
@@ -53,6 +53,8 @@ def test_sample(self, test_home, dataset_mock, config):
 
         try:
             sample = next(iter(dataset))
+        except StopIteration:
+            raise AssertionError("Unable to draw any sample.") from None
         except Exception as error:
             raise AssertionError("Drawing a sample raised the error above.") from error
 
 
@@ -2,9 +2,10 @@
 
 import pytest
 import torch
+from common_utils import assert_equal
 from test_prototype_transforms_functional import make_images, make_bounding_boxes, make_one_hot_labels
 from torchvision.prototype import transforms, features
-from torchvision.transforms.functional import to_pil_image
+from torchvision.transforms.functional import to_pil_image, pil_to_tensor
 
 
 def make_vanilla_tensor_images(*args, **kwargs):
@@ -66,10 +67,10 @@ def parametrize_from_transforms(*transforms):
 class TestSmoke:
     @parametrize_from_transforms(
         transforms.RandomErasing(p=1.0),
-        transforms.HorizontalFlip(),
         transforms.Resize([16, 16]),
         transforms.CenterCrop([16, 16]),
         transforms.ConvertImageDtype(),
+        transforms.RandomHorizontalFlip(),
     )
     def test_common(self, transform, input):
         transform(input)
@@ -188,3 +189,56 @@ def test_random_resized_crop(self, transform, input):
     )
     def test_convert_image_color_space(self, transform, input):
         transform(input)
+
+
+@pytest.mark.parametrize("p", [0.0, 1.0])
+class TestRandomHorizontalFlip:
+    def input_expected_image_tensor(self, p, dtype=torch.float32):
+        input = torch.tensor([[[0, 1], [0, 1]], [[1, 0], [1, 0]]], dtype=dtype)
+        expected = torch.tensor([[[1, 0], [1, 0]], [[0, 1], [0, 1]]], dtype=dtype)
+
+        return input, expected if p == 1 else input
+
+    def test_simple_tensor(self, p):
+        input, expected = self.input_expected_image_tensor(p)
+        transform = transforms.RandomHorizontalFlip(p=p)
+
+        actual = transform(input)
+
+        assert_equal(expected, actual)
+
+    def test_pil_image(self, p):
+        input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8)
+        transform = transforms.RandomHorizontalFlip(p=p)
+
+        actual = transform(to_pil_image(input))
+
+        assert_equal(expected, pil_to_tensor(actual))
+
+    def test_features_image(self, p):
+        input, expected = self.input_expected_image_tensor(p)
+        transform = transforms.RandomHorizontalFlip(p=p)
+
+        actual = transform(features.Image(input))
+
+        assert_equal(features.Image(expected), actual)
+
+    def test_features_segmentation_mask(self, p):
+        input, expected = self.input_expected_image_tensor(p)
+        transform = transforms.RandomHorizontalFlip(p=p)
+
+        actual = transform(features.SegmentationMask(input))
+
+        assert_equal(features.SegmentationMask(expected), actual)
+
+    def test_features_bounding_box(self, p):
+        input = features.BoundingBox([0, 0, 5, 5], format=features.BoundingBoxFormat.XYXY, image_size=(10, 10))
+        transform = transforms.RandomHorizontalFlip(p=p)
+
+        actual = transform(input)
+
+        expected_image_tensor = torch.tensor([5, 0, 10, 5]) if p == 1.0 else input
+        expected = features.BoundingBox.new_like(input, data=expected_image_tensor)
+        assert_equal(expected, actual)
+        assert actual.format == expected.format
+        assert actual.image_size == expected.image_size
@@ -452,12 +452,12 @@ def test_resize_size_equals_small_edge_size(height, width):
 
 
 class TestPad:
-    def test_pad(self):
+    @pytest.mark.parametrize("fill", [85, 85.0])
+    def test_pad(self, fill):
         height = random.randint(10, 32) * 2
         width = random.randint(10, 32) * 2
         img = torch.ones(3, height, width, dtype=torch.uint8)
         padding = random.randint(1, 20)
-        fill = random.randint(1, 50)
         result = transforms.Compose(
             [
                 transforms.ToPILImage(),
@@ -484,7 +484,7 @@ def test_pad_with_tuple_of_pad_values(self):
         output = transforms.Pad(padding)(img)
         assert output.size == (width + padding[0] * 2, height + padding[1] * 2)
 
-        padding = tuple(random.randint(1, 20) for _ in range(4))
+        padding = [random.randint(1, 20) for _ in range(4)]
         output = transforms.Pad(padding)(img)
         assert output.size[0] == width + padding[0] + padding[2]
         assert output.size[1] == height + padding[1] + padding[3]
 
@@ -1,5 +1,5 @@
 import enum
-from typing import TypeVar, Type
+from typing import Sequence, TypeVar, Type
 
 T = TypeVar("T", bound=enum.Enum)
 
@@ -18,3 +18,15 @@ def from_str(self: Type[T], member: str) -> T:  # type: ignore[misc]
 
 class StrEnum(enum.Enum, metaclass=StrEnumMeta):
     pass
+
+
+def sequence_to_str(seq: Sequence, separate_last: str = "") -> str:
+    if not seq:
+        return ""
+    if len(seq) == 1:
+        return f"'{seq[0]}'"
+
+    head = "'" + "', '".join([str(item) for item in seq[:-1]]) + "'"
+    tail = f"{'' if separate_last and len(seq) == 2 else ','} {separate_last}'{seq[-1]}'"
+
+    return head + tail
@@ -7,7 +7,8 @@
 from typing import Any, Dict, List, Optional, Sequence, Union, Tuple, Collection
 
 from torch.utils.data import IterDataPipe
-from torchvision.prototype.utils._internal import FrozenBunch, make_repr, add_suggestion, sequence_to_str
+from torchvision._utils import sequence_to_str
+from torchvision.prototype.utils._internal import FrozenBunch, make_repr, add_suggestion
 
 from .._home import use_sharded_dataset
 from ._internal import BUILTIN_DIR, _make_sharded_datapipe
 
@@ -64,7 +64,7 @@ def to_format(self, format: Union[str, BoundingBoxFormat]) -> BoundingBox:
         from torchvision.prototype.transforms.functional import convert_bounding_box_format
 
         if isinstance(format, str):
-            format = BoundingBoxFormat[format]
+            format = BoundingBoxFormat.from_str(format.upper())
 
         return BoundingBox.new_like(
             self, convert_bounding_box_format(self, old_format=self.format, new_format=format), format=format
 
@@ -7,7 +7,16 @@
 from ._augment import RandomErasing, RandomMixup, RandomCutmix
 from ._auto_augment import RandAugment, TrivialAugmentWide, AutoAugment, AugMix
 from ._container import Compose, RandomApply, RandomChoice, RandomOrder
-from ._geometry import HorizontalFlip, Resize, CenterCrop, RandomResizedCrop, FiveCrop, TenCrop, BatchMultiCrop
+from ._geometry import (
+    Resize,
+    CenterCrop,
+    RandomResizedCrop,
+    FiveCrop,
+    TenCrop,
+    BatchMultiCrop,
+    RandomHorizontalFlip,
+    RandomZoomOut,
+)
 from ._meta import ConvertBoundingBoxFormat, ConvertImageDtype, ConvertImageColorSpace
 from ._misc import Identity, Normalize, ToDtype, Lambda
 from ._presets import (