pytorch
diff --git a/‎test/test_prototype_transforms.py
Lines changed: 126 additions & 7 deletions b/‎test/test_prototype_transforms.py
Lines changed: 126 additions & 7 deletions
diff --git a/‎test/test_prototype_transforms_utils.py
Lines changed: 83 additions & 0 deletions b/‎test/test_prototype_transforms_utils.py
Lines changed: 83 additions & 0 deletions
diff --git a/‎torchvision/datasets/food101.py
Lines changed: 1 addition & 1 deletion b/‎torchvision/datasets/food101.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎torchvision/datasets/ucf101.py
Lines changed: 1 addition & 1 deletion b/‎torchvision/datasets/ucf101.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎torchvision/prototype/transforms/__init__.py
Lines changed: 1 addition & 0 deletions b/‎torchvision/prototype/transforms/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎torchvision/prototype/transforms/_augment.py
Lines changed: 8 additions & 6 deletions b/‎torchvision/prototype/transforms/_augment.py
Lines changed: 8 additions & 6 deletions
@@ -6,7 +6,7 @@
 
 import pytest
 import torch
-from common_utils import assert_equal
+from common_utils import assert_equal, cpu_and_gpu
 from test_prototype_transforms_functional import (
     make_bounding_box,
     make_bounding_boxes,
@@ -15,6 +15,7 @@
     make_one_hot_labels,
     make_segmentation_mask,
 )
+from torchvision.ops.boxes import box_iou
 from torchvision.prototype import features, transforms
 from torchvision.transforms.functional import InterpolationMode, pil_to_tensor, to_pil_image
 
@@ -793,7 +794,7 @@ def test__transform(self, p, transform_cls, func_op_name, kwargs, mocker):
         if p > 0.0:
             fn.assert_called_once_with(inpt, **kwargs)
         else:
-            fn.call_count == 0
+            assert fn.call_count == 0
 
 
 class TestRandomPerspective:
@@ -1014,7 +1015,7 @@ def test__transform(self, p, inpt_type, mocker):
         if p > 0.0:
             fn.assert_called_once_with(erase_image_tensor_inpt, **params)
         else:
-            fn.call_count == 0
+            assert fn.call_count == 0
 
 
 class TestTransform:
@@ -1050,7 +1051,7 @@ def test__transform(self, inpt_type, mocker):
         transform = transforms.ToImageTensor()
         transform(inpt)
         if inpt_type in (features.BoundingBox, str, int):
-            fn.call_count == 0
+            assert fn.call_count == 0
         else:
             fn.assert_called_once_with(inpt, copy=transform.copy)
 
@@ -1067,7 +1068,7 @@ def test__transform(self, inpt_type, mocker):
         transform = transforms.ToImagePIL()
         transform(inpt)
         if inpt_type in (features.BoundingBox, str, int):
-            fn.call_count == 0
+            assert fn.call_count == 0
         else:
             fn.assert_called_once_with(inpt, copy=transform.copy)
 
@@ -1085,7 +1086,7 @@ def test__transform(self, inpt_type, mocker):
             transform = transforms.ToPILImage()
         transform(inpt)
         if inpt_type in (PIL.Image.Image, features.BoundingBox, str, int):
-            fn.call_count == 0
+            assert fn.call_count == 0
         else:
             fn.assert_called_once_with(inpt, mode=transform.mode)
 
@@ -1103,7 +1104,7 @@ def test__transform(self, inpt_type, mocker):
             transform = transforms.ToTensor()
         transform(inpt)
         if inpt_type in (features.Image, torch.Tensor, features.BoundingBox, str, int):
-            fn.call_count == 0
+            assert fn.call_count == 0
         else:
             fn.assert_called_once_with(inpt)
 
@@ -1127,6 +1128,124 @@ def test_ctor(self, trfms):
         assert isinstance(output, torch.Tensor)
 
 
+class TestRandomIoUCrop:
+    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("options", [[0.5, 0.9], [2.0]])
+    def test__get_params(self, device, options, mocker):
+        image = mocker.MagicMock(spec=features.Image)
+        image.num_channels = 3
+        image.image_size = (24, 32)
+        bboxes = features.BoundingBox(
+            torch.tensor([[1, 1, 10, 10], [20, 20, 23, 23], [1, 20, 10, 23], [20, 1, 23, 10]]),
+            format="XYXY",
+            image_size=image.image_size,
+            device=device,
+        )
+        sample = [image, bboxes]
+
+        transform = transforms.RandomIoUCrop(sampler_options=options)
+
+        n_samples = 5
+        for _ in range(n_samples):
+
+            params = transform._get_params(sample)
+
+            if options == [2.0]:
+                assert len(params) == 0
+                return
+
+            assert len(params["is_within_crop_area"]) > 0
+            assert params["is_within_crop_area"].dtype == torch.bool
+
+            orig_h = image.image_size[0]
+            orig_w = image.image_size[1]
+            assert int(transform.min_scale * orig_h) <= params["height"] <= int(transform.max_scale * orig_h)
+            assert int(transform.min_scale * orig_w) <= params["width"] <= int(transform.max_scale * orig_w)
+
+            left, top = params["left"], params["top"]
+            new_h, new_w = params["height"], params["width"]
+            ious = box_iou(
+                bboxes,
+                torch.tensor([[left, top, left + new_w, top + new_h]], dtype=bboxes.dtype, device=bboxes.device),
+            )
+            assert ious.max() >= options[0] or ious.max() >= options[1], f"{ious} vs {options}"
+
+    def test__transform_empty_params(self, mocker):
+        transform = transforms.RandomIoUCrop(sampler_options=[2.0])
+        image = features.Image(torch.rand(1, 3, 4, 4))
+        bboxes = features.BoundingBox(torch.tensor([[1, 1, 2, 2]]), format="XYXY", image_size=(4, 4))
+        label = features.Label(torch.tensor([1]))
+        sample = [image, bboxes, label]
+        # Let's mock transform._get_params to control the output:
+        transform._get_params = mocker.MagicMock(return_value={})
+        output = transform(sample)
+        torch.testing.assert_close(output, sample)
+
+    def test_forward_assertion(self):
+        transform = transforms.RandomIoUCrop()
+        with pytest.raises(
+            TypeError,
+            match="requires input sample to contain Images or PIL Images, BoundingBoxes and Labels or OneHotLabels",
+        ):
+            transform(torch.tensor(0))
+
+    def test__transform(self, mocker):
+        transform = transforms.RandomIoUCrop()
+
+        image = features.Image(torch.rand(3, 32, 24))
+        bboxes = make_bounding_box(format="XYXY", image_size=(32, 24), extra_dims=(6,))
+        label = features.Label(torch.randint(0, 10, size=(6,)))
+        ohe_label = features.OneHotLabel(torch.zeros(6, 10).scatter_(1, label.unsqueeze(1), 1))
+        masks = make_segmentation_mask((32, 24))
+        ohe_masks = features.SegmentationMask(torch.randint(0, 2, size=(6, 32, 24)))
+        sample = [image, bboxes, label, ohe_label, masks, ohe_masks]
+
+        fn = mocker.patch("torchvision.prototype.transforms.functional.crop", side_effect=lambda x, **params: x)
+        is_within_crop_area = torch.tensor([0, 1, 0, 1, 0, 1], dtype=torch.bool)
+
+        params = dict(top=1, left=2, height=12, width=12, is_within_crop_area=is_within_crop_area)
+        transform._get_params = mocker.MagicMock(return_value=params)
+        output = transform(sample)
+
+        assert fn.call_count == 4
+
+        expected_calls = [
+            mocker.call(image, top=params["top"], left=params["left"], height=params["height"], width=params["width"]),
+            mocker.call(bboxes, top=params["top"], left=params["left"], height=params["height"], width=params["width"]),
+            mocker.call(masks, top=params["top"], left=params["left"], height=params["height"], width=params["width"]),
+            mocker.call(
+                ohe_masks, top=params["top"], left=params["left"], height=params["height"], width=params["width"]
+            ),
+        ]
+
+        fn.assert_has_calls(expected_calls)
+
+        expected_within_targets = sum(is_within_crop_area)
+
+        # check number of bboxes vs number of labels:
+        output_bboxes = output[1]
+        assert isinstance(output_bboxes, features.BoundingBox)
+        assert len(output_bboxes) == expected_within_targets
+
+        # check labels
+        output_label = output[2]
+        assert isinstance(output_label, features.Label)
+        assert len(output_label) == expected_within_targets
+        torch.testing.assert_close(output_label, label[is_within_crop_area])
+
+        output_ohe_label = output[3]
+        assert isinstance(output_ohe_label, features.OneHotLabel)
+        torch.testing.assert_close(output_ohe_label, ohe_label[is_within_crop_area])
+
+        output_masks = output[4]
+        assert isinstance(output_masks, features.SegmentationMask)
+        assert output_masks.shape[:-2] == masks.shape[:-2]
+
+        output_ohe_masks = output[5]
+        assert isinstance(output_ohe_masks, features.SegmentationMask)
+        assert len(output_ohe_masks) == expected_within_targets
+
+
 class TestScaleJitter:
     def test__get_params(self, mocker):
         image_size = (24, 32)
 
@@ -0,0 +1,83 @@
+import PIL.Image
+import pytest
+
+import torch
+
+from test_prototype_transforms_functional import make_bounding_box, make_image, make_segmentation_mask
+
+from torchvision.prototype import features
+from torchvision.prototype.transforms._utils import has_all, has_any, is_simple_tensor
+from torchvision.prototype.transforms.functional import to_image_pil
+
+
+IMAGE = make_image(color_space=features.ColorSpace.RGB)
+BOUNDING_BOX = make_bounding_box(format=features.BoundingBoxFormat.XYXY, image_size=IMAGE.image_size)
+SEGMENTATION_MASK = make_segmentation_mask(size=IMAGE.image_size)
+
+
+@pytest.mark.parametrize(
+    ("sample", "types", "expected"),
+    [
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.Image,), True),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.BoundingBox,), True),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.SegmentationMask,), True),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.Image, features.BoundingBox), True),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.Image, features.SegmentationMask), True),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.BoundingBox, features.SegmentationMask), True),
+        ((SEGMENTATION_MASK,), (features.Image, features.BoundingBox), False),
+        ((BOUNDING_BOX,), (features.Image, features.SegmentationMask), False),
+        ((IMAGE,), (features.BoundingBox, features.SegmentationMask), False),
+        (
+            (IMAGE, BOUNDING_BOX, SEGMENTATION_MASK),
+            (features.Image, features.BoundingBox, features.SegmentationMask),
+            True,
+        ),
+        ((), (features.Image, features.BoundingBox, features.SegmentationMask), False),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (lambda obj: isinstance(obj, features.Image),), True),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (lambda _: False,), False),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (lambda _: True,), True),
+        ((IMAGE,), (features.Image, PIL.Image.Image, is_simple_tensor), True),
+        ((torch.Tensor(IMAGE),), (features.Image, PIL.Image.Image, is_simple_tensor), True),
+        ((to_image_pil(IMAGE),), (features.Image, PIL.Image.Image, is_simple_tensor), True),
+    ],
+)
+def test_has_any(sample, types, expected):
+    assert has_any(sample, *types) is expected
+
+
+@pytest.mark.parametrize(
+    ("sample", "types", "expected"),
+    [
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.Image,), True),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.BoundingBox,), True),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.SegmentationMask,), True),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.Image, features.BoundingBox), True),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.Image, features.SegmentationMask), True),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (features.BoundingBox, features.SegmentationMask), True),
+        (
+            (IMAGE, BOUNDING_BOX, SEGMENTATION_MASK),
+            (features.Image, features.BoundingBox, features.SegmentationMask),
+            True,
+        ),
+        ((BOUNDING_BOX, SEGMENTATION_MASK), (features.Image, features.BoundingBox), False),
+        ((BOUNDING_BOX, SEGMENTATION_MASK), (features.Image, features.SegmentationMask), False),
+        ((IMAGE, SEGMENTATION_MASK), (features.BoundingBox, features.SegmentationMask), False),
+        (
+            (IMAGE, BOUNDING_BOX, SEGMENTATION_MASK),
+            (features.Image, features.BoundingBox, features.SegmentationMask),
+            True,
+        ),
+        ((BOUNDING_BOX, SEGMENTATION_MASK), (features.Image, features.BoundingBox, features.SegmentationMask), False),
+        ((IMAGE, SEGMENTATION_MASK), (features.Image, features.BoundingBox, features.SegmentationMask), False),
+        ((IMAGE, BOUNDING_BOX), (features.Image, features.BoundingBox, features.SegmentationMask), False),
+        (
+            (IMAGE, BOUNDING_BOX, SEGMENTATION_MASK),
+            (lambda obj: isinstance(obj, (features.Image, features.BoundingBox, features.SegmentationMask)),),
+            True,
+        ),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (lambda _: False,), False),
+        ((IMAGE, BOUNDING_BOX, SEGMENTATION_MASK), (lambda _: True,), True),
+    ],
+)
+def test_has_all(sample, types, expected):
+    assert has_all(sample, *types) is expected
@@ -11,7 +11,7 @@
 class Food101(VisionDataset):
     """`The Food-101 Data Set <https://data.vision.ee.ethz.ch/cvl/datasets_extra/food-101/>`_.
 
-    The Food-101 is a challenging data set of 101 food categories, with 101'000 images.
+    The Food-101 is a challenging data set of 101 food categories with 101,000 images.
     For each class, 250 manually reviewed test images are provided as well as 750 training images.
     On purpose, the training images were not cleaned, and thus still contain some amount of noise.
     This comes mostly in the form of intense colors and sometimes wrong labels. All images were
 
@@ -17,7 +17,7 @@ class UCF101(VisionDataset):
     by ``frames_per_clip``, where the step in frames between each clip is given by
     ``step_between_clips``. The dataset itself can be downloaded from the dataset website;
     annotations that ``annotation_path`` should be pointing to can be downloaded from `here
-    <https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip>`.
+    <https://www.crcv.ucf.edu/data/UCF101/UCF101TrainTestSplits-RecognitionTask.zip>`_.
 
     To give an example, for 2 videos with 10 and 15 frames respectively, if ``frames_per_clip=5``
     and ``step_between_clips=5``, the dataset size will be (2 + 3) = 5, where the first two
 
@@ -24,6 +24,7 @@
     RandomAffine,
     RandomCrop,
     RandomHorizontalFlip,
+    RandomIoUCrop,
     RandomPerspective,
     RandomResizedCrop,
     RandomRotation,
 
@@ -6,10 +6,10 @@
 import PIL.Image
 import torch
 from torchvision.prototype import features
-from torchvision.prototype.transforms import functional as F, Transform
+from torchvision.prototype.transforms import functional as F
 
 from ._transform import _RandomApplyTransform
-from ._utils import get_image_dimensions, has_all, has_any, is_simple_tensor, query_image
+from ._utils import get_image_dimensions, has_any, is_simple_tensor, query_image
 
 
 class RandomErasing(_RandomApplyTransform):
@@ -97,15 +97,17 @@ def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
             return inpt
 
 
-class _BaseMixupCutmix(Transform):
-    def __init__(self, *, alpha: float) -> None:
-        super().__init__()
+class _BaseMixupCutmix(_RandomApplyTransform):
+    def __init__(self, *, alpha: float, p: float = 0.5) -> None:
+        super().__init__(p=p)
         self.alpha = alpha
         self._dist = torch.distributions.Beta(torch.tensor([alpha]), torch.tensor([alpha]))
 
     def forward(self, *inpts: Any) -> Any:
         sample = inpts if len(inpts) > 1 else inpts[0]
-        if not has_all(sample, features.Image, features.OneHotLabel):
+        if not (
+            has_any(sample, features.Image, PIL.Image.Image, is_simple_tensor) and has_any(sample, features.OneHotLabel)
+        ):
             raise TypeError(f"{type(self).__name__}() is only defined for Image's *and* OneHotLabel's.")
         if has_any(sample, features.BoundingBox, features.SegmentationMask, features.Label):
             raise TypeError(