Updates, patches and test updates

vfdev-5 · vfdev-5 · commit 72ac231fa044 · 2023-05-12T09:34:29.000+02:00
diff --git a/test/common_utils.py b/test/common_utils.py
@@ -497,18 +497,21 @@ def make_image_loader(
     extra_dims=(),
     dtype=torch.float32,
     constant_alpha=True,
+    memory_format=torch.contiguous_format,
 ):
     size = _parse_spatial_size(size)
     num_channels = get_num_channels(color_space)
 
-    def fn(shape, dtype, device):
+    def fn(shape, dtype, device, memory_format):
         max_value = get_max_value(dtype)
-        data = torch.testing.make_tensor(shape, low=0, high=max_value, dtype=dtype, device=device)
+        data = torch.testing.make_tensor(
+            shape, low=0, high=max_value, dtype=dtype, device=device, memory_format=memory_format
+        )
         if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha:
             data[..., -1, :, :] = max_value
         return datapoints.Image(data)
 
-    return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype)
+    return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype, memory_format=memory_format)
 
 
 make_image = from_loader(make_image_loader)
@@ -757,8 +760,10 @@ def make_video_loader(
     size = _parse_spatial_size(size)
     num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames
 
-    def fn(shape, dtype, device):
-        video = make_image(size=shape[-2:], extra_dims=shape[:-3], dtype=dtype, device=device)
+    def fn(shape, dtype, device, memory_format):
+        video = make_image(
+            size=shape[-2:], extra_dims=shape[:-3], dtype=dtype, device=device, memory_format=memory_format
+        )
         return datapoints.Video(video)
 
     return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype)
diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py
@@ -98,6 +98,8 @@ def __init__(
             ArgsKwargs((29, 32), antialias=False),
             ArgsKwargs((28, 31), antialias=True),
         ],
+        # rtol=atol=1 due to Resize v2 is using native uint8 interpolate path for biliear and nearest modes
+        closeness_kwargs=dict(rtol=1, atol=1),
     ),
     ConsistencyConfig(
         v2_transforms.CenterCrop,
@@ -313,6 +315,8 @@ def __init__(
             ArgsKwargs((29, 32), antialias=False),
             ArgsKwargs((28, 31), antialias=True),
         ],
+        # rtol=atol=1 due to Resize v2 is using native uint8 interpolate path for biliear and nearest modes
+        closeness_kwargs=dict(rtol=1, atol=1),
     ),
     ConsistencyConfig(
         v2_transforms.RandomErasing,
@@ -783,7 +787,8 @@ def test_compose(self):
             ]
         )
 
-        check_call_consistency(prototype_transform, legacy_transform)
+        # rtol=atol=1 due to Resize v2 is using native uint8 interpolate path for biliear and nearest modes
+        check_call_consistency(prototype_transform, legacy_transform, closeness_kwargs=dict(rtol=1, atol=1))
 
     @pytest.mark.parametrize("p", [0, 0.1, 0.5, 0.9, 1])
     @pytest.mark.parametrize("sequence_type", [list, nn.ModuleList])
@@ -807,7 +812,8 @@ def test_random_apply(self, p, sequence_type):
             p=p,
         )
 
-        check_call_consistency(prototype_transform, legacy_transform)
+        # rtol=atol=1 due to Resize v2 is using native uint8 interpolate path for biliear and nearest modes
+        check_call_consistency(prototype_transform, legacy_transform, closeness_kwargs=dict(rtol=1, atol=1))
 
         if sequence_type is nn.ModuleList:
             # quick and dirty test that it is jit-scriptable
@@ -832,7 +838,8 @@ def test_random_choice(self, probabilities):
             p=probabilities,
         )
 
-        check_call_consistency(prototype_transform, legacy_transform)
+        # rtol=atol=1 due to Resize v2 is using native uint8 interpolate path for biliear and nearest modes
+        check_call_consistency(prototype_transform, legacy_transform, closeness_kwargs=dict(rtol=1, atol=1))
 
 
 class TestToTensorTransforms:
diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py
@@ -1569,31 +1569,35 @@ def reference_inputs_equalize_image_tensor():
     # We are not using `make_image_loaders` here since that uniformly samples the values over the whole value range.
     # Since the whole point of this kernel is to transform an arbitrary distribution of values into a uniform one,
     # the information gain is low if we already provide something really close to the expected value.
-    def make_uniform_band_image(shape, dtype, device, *, low_factor, high_factor):
+    def make_uniform_band_image(shape, dtype, device, *, low_factor, high_factor, memory_format):
         if dtype.is_floating_point:
             low = low_factor
             high = high_factor
         else:
             max_value = torch.iinfo(dtype).max
             low = int(low_factor * max_value)
             high = int(high_factor * max_value)
-        return torch.testing.make_tensor(shape, dtype=dtype, device=device, low=low, high=high)
+        return torch.testing.make_tensor(shape, dtype=dtype, device=device, low=low, high=high).to(
+            memory_format=memory_format, copy=True
+        )
 
-    def make_beta_distributed_image(shape, dtype, device, *, alpha, beta):
+    def make_beta_distributed_image(shape, dtype, device, *, alpha, beta, memory_format):
         image = torch.distributions.Beta(alpha, beta).sample(shape)
         if not dtype.is_floating_point:
             image.mul_(torch.iinfo(dtype).max).round_()
-        return image.to(dtype=dtype, device=device)
+        return image.to(dtype=dtype, device=device, memory_format=memory_format, copy=True)
 
     spatial_size = (256, 256)
     for dtype, color_space, fn in itertools.product(
         [torch.uint8],
         ["GRAY", "RGB"],
         [
-            lambda shape, dtype, device: torch.zeros(shape, dtype=dtype, device=device),
-            lambda shape, dtype, device: torch.full(
-                shape, 1.0 if dtype.is_floating_point else torch.iinfo(dtype).max, dtype=dtype, device=device
+            lambda shape, dtype, device, memory_format: torch.zeros(shape, dtype=dtype, device=device).to(
+                memory_format=memory_format, copy=True
             ),
+            lambda shape, dtype, device, memory_format: torch.full(
+                shape, 1.0 if dtype.is_floating_point else torch.iinfo(dtype).max, dtype=dtype, device=device
+            ).to(memory_format=memory_format, copy=True),
             *[
                 functools.partial(make_uniform_band_image, low_factor=low_factor, high_factor=high_factor)
                 for low_factor, high_factor in [
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
@@ -195,6 +195,12 @@ def resize_image_tensor(
             if "AVX2" in torch.backends.cpu.get_cpu_capability():
                 acceptable_dtypes.append(torch.uint8)
 
+                # TODO: Remove when https://github.com/pytorch/pytorch/pull/101136 is landed
+                if dtype == torch.uint8 and not (
+                    image.is_contiguous() or image.is_contiguous(memory_format=torch.channels_last)
+                ):
+                    image = image.contiguous(memory_format=torch.channels_last)
+
         if image.is_contiguous(memory_format=torch.channels_last):
             strides = image.stride()
             numel = image.numel()