From 409df6856f589d5c2b3d52378fadbd9b920f94a7 Mon Sep 17 00:00:00 2001 From: mantasu Date: Wed, 7 Feb 2024 10:50:38 +0000 Subject: [PATCH 1/7] Clarify default TVTensor shapes --- torchvision/tv_tensors/_bounding_boxes.py | 2 +- torchvision/tv_tensors/_image.py | 2 +- torchvision/tv_tensors/_mask.py | 2 +- torchvision/tv_tensors/_video.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/torchvision/tv_tensors/_bounding_boxes.py b/torchvision/tv_tensors/_bounding_boxes.py index 56e77c2a85e..62997040c66 100644 --- a/torchvision/tv_tensors/_bounding_boxes.py +++ b/torchvision/tv_tensors/_bounding_boxes.py @@ -25,7 +25,7 @@ class BoundingBoxFormat(Enum): class BoundingBoxes(TVTensor): - """:class:`torch.Tensor` subclass for bounding boxes. + """:class:`torch.Tensor` subclass for bounding boxes with a shape of ``(N, 4)``. .. note:: There should be only one :class:`~torchvision.tv_tensors.BoundingBoxes` diff --git a/torchvision/tv_tensors/_image.py b/torchvision/tv_tensors/_image.py index c2f82c8d0df..c920f98fe78 100644 --- a/torchvision/tv_tensors/_image.py +++ b/torchvision/tv_tensors/_image.py @@ -9,7 +9,7 @@ class Image(TVTensor): - """:class:`torch.Tensor` subclass for images. + """:class:`torch.Tensor` subclass for images with a minimum shape of ``(C x H x W)``. .. note:: diff --git a/torchvision/tv_tensors/_mask.py b/torchvision/tv_tensors/_mask.py index a8f6f4d62cb..82ccb38a4a9 100644 --- a/torchvision/tv_tensors/_mask.py +++ b/torchvision/tv_tensors/_mask.py @@ -9,7 +9,7 @@ class Mask(TVTensor): - """:class:`torch.Tensor` subclass for segmentation and detection masks. + """:class:`torch.Tensor` subclass for segmentation and detection masks with a minimum shape of ``(H x W)``. Args: data (tensor-like, PIL.Image.Image): Any data that can be turned into a tensor with :func:`torch.as_tensor` as diff --git a/torchvision/tv_tensors/_video.py b/torchvision/tv_tensors/_video.py index a0466b001ee..1f0c3317092 100644 --- a/torchvision/tv_tensors/_video.py +++ b/torchvision/tv_tensors/_video.py @@ -8,7 +8,7 @@ class Video(TVTensor): - """:class:`torch.Tensor` subclass for videos. + """:class:`torch.Tensor` subclass for videos with a minimum shape of ``(T x C x H x W)``. Args: data (tensor-like): Any data that can be turned into a tensor with :func:`torch.as_tensor`. From 2cacb30218d493905b13c8bc76e5e2db1aebf112 Mon Sep 17 00:00:00 2001 From: mantasu Date: Wed, 7 Feb 2024 11:13:26 +0000 Subject: [PATCH 2/7] Allow to_image to handle image paths --- torchvision/transforms/v2/_type_conversion.py | 8 ++++---- torchvision/transforms/v2/functional/_type_conversion.py | 9 +++++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/torchvision/transforms/v2/_type_conversion.py b/torchvision/transforms/v2/_type_conversion.py index 7c7439b1d02..3ed72ca070f 100644 --- a/torchvision/transforms/v2/_type_conversion.py +++ b/torchvision/transforms/v2/_type_conversion.py @@ -25,16 +25,16 @@ def _transform(self, inpt: PIL.Image.Image, params: Dict[str, Any]) -> torch.Ten class ToImage(Transform): - """Convert a tensor, ndarray, or PIL Image to :class:`~torchvision.tv_tensors.Image` - ; this does not scale values. + """Convert a tensor, ndarray, PIL Image, or string representing image path to + :class:`~torchvision.tv_tensors.Image`; this does not scale values. This transform does not support torchscript. """ - _transformed_types = (is_pure_tensor, PIL.Image.Image, np.ndarray) + _transformed_types = (is_pure_tensor, PIL.Image.Image, np.ndarray, str) def _transform( - self, inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray], params: Dict[str, Any] + self, inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray, str], params: Dict[str, Any] ) -> tv_tensors.Image: return F.to_image(inpt) diff --git a/torchvision/transforms/v2/functional/_type_conversion.py b/torchvision/transforms/v2/functional/_type_conversion.py index c5a731fe143..7087666c81b 100644 --- a/torchvision/transforms/v2/functional/_type_conversion.py +++ b/torchvision/transforms/v2/functional/_type_conversion.py @@ -6,9 +6,11 @@ from torchvision import tv_tensors from torchvision.transforms import functional as _F +from ....io import read_image + @torch.jit.unused -def to_image(inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray]) -> tv_tensors.Image: +def to_image(inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray, str]) -> tv_tensors.Image: """See :class:`~torchvision.transforms.v2.ToImage` for details.""" if isinstance(inpt, np.ndarray): output = torch.from_numpy(np.atleast_3d(inpt)).permute((2, 0, 1)).contiguous() @@ -16,9 +18,12 @@ def to_image(inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray]) -> tv_tenso output = pil_to_tensor(inpt) elif isinstance(inpt, torch.Tensor): output = inpt + elif isinstance(inpt, str): + output = read_image(inpt) else: raise TypeError( - f"Input can either be a pure Tensor, a numpy array, or a PIL image, but got {type(inpt)} instead." + f"Input can either be a pure Tensor, a numpy array, a PIL image, " + f"or a string representing image path, but got {type(inpt)} instead." ) return tv_tensors.Image(output) From 1fbcfc525aabd36ae981d92462d452a6ee75f1d7 Mon Sep 17 00:00:00 2001 From: mantasu Date: Wed, 7 Feb 2024 11:59:57 +0000 Subject: [PATCH 3/7] Add test_image_file for TestToImage --- test/test_transforms_v2.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 458f83f01c3..6d22e59d0ba 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -8,6 +8,7 @@ import random import re import sys +import tempfile from copy import deepcopy from pathlib import Path from unittest import mock @@ -1106,9 +1107,11 @@ def test_kernel_image(self, param, value, dtype, device): make_image(dtype=dtype, device=device), **{param: value}, check_scripted_vs_eager=not (param in {"shear", "fill"} and isinstance(value, (int, float))), - check_cuda_vs_cpu=dict(atol=1, rtol=0) - if dtype is torch.uint8 and param == "interpolation" and value is transforms.InterpolationMode.BILINEAR - else True, + check_cuda_vs_cpu=( + dict(atol=1, rtol=0) + if dtype is torch.uint8 and param == "interpolation" and value is transforms.InterpolationMode.BILINEAR + else True + ), ) @param_value_parametrization( @@ -5182,13 +5185,28 @@ def test_functional_and_transform(self, make_input, fn): if isinstance(input, torch.Tensor): assert output.data_ptr() == input.data_ptr() + @pytest.mark.parametrize("fn", [F.to_image, transform_cls_to_functional(transforms.ToImage)]) + def test_image_file(self, fn): + # Non-regression test for https://github.com/pytorch/vision/issues/8261 + img_np = np.random.randint(0, 256, (10, 10, 3), dtype=np.uint8) + temp_file = tempfile.NamedTemporaryFile(suffix=".jpg", delete=True) + PIL.Image.fromarray(img_np).save(temp_file.name) + + output = fn(temp_file.name) + assert isinstance(output, tv_tensors.Image) + assert F.get_size(output) == list(img_np.shape[:2]) + + temp_file.close() + def test_2d_np_array(self): # Non-regression test for https://github.com/pytorch/vision/issues/8255 input = np.random.rand(10, 10) assert F.to_image(input).shape == (1, 10, 10) def test_functional_error(self): - with pytest.raises(TypeError, match="Input can either be a pure Tensor, a numpy array, or a PIL image"): + with pytest.raises( + TypeError, match="Input can either be a pure Tensor, a numpy array, a PIL image, or a string path" + ): F.to_image(object()) From 4c1bde21688e22b3b781d313bb45827dfdfbb724 Mon Sep 17 00:00:00 2001 From: mantasu Date: Wed, 7 Feb 2024 14:03:02 +0000 Subject: [PATCH 4/7] Replace relative import with absolute --- torchvision/transforms/v2/functional/_type_conversion.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torchvision/transforms/v2/functional/_type_conversion.py b/torchvision/transforms/v2/functional/_type_conversion.py index 7087666c81b..46392bf8f1e 100644 --- a/torchvision/transforms/v2/functional/_type_conversion.py +++ b/torchvision/transforms/v2/functional/_type_conversion.py @@ -4,10 +4,9 @@ import PIL.Image import torch from torchvision import tv_tensors +from torchvision.io import read_image from torchvision.transforms import functional as _F -from ....io import read_image - @torch.jit.unused def to_image(inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray, str]) -> tv_tensors.Image: From e16b71ca877ad2bf26a8c39f1cd9d190217777d8 Mon Sep 17 00:00:00 2001 From: mantasu Date: Tue, 5 Mar 2024 14:11:21 +0000 Subject: [PATCH 5/7] Revert "Add test_image_file for TestToImage" (see #8261) This reverts commit 1fbcfc525aabd36ae981d92462d452a6ee75f1d7. --- test/test_transforms_v2.py | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 731d855d51a..0fb3ee6c11f 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -8,7 +8,6 @@ import random import re import sys -import tempfile from copy import deepcopy from pathlib import Path from unittest import mock @@ -1107,11 +1106,9 @@ def test_kernel_image(self, param, value, dtype, device): make_image(dtype=dtype, device=device), **{param: value}, check_scripted_vs_eager=not (param in {"shear", "fill"} and isinstance(value, (int, float))), - check_cuda_vs_cpu=( - dict(atol=1, rtol=0) - if dtype is torch.uint8 and param == "interpolation" and value is transforms.InterpolationMode.BILINEAR - else True - ), + check_cuda_vs_cpu=dict(atol=1, rtol=0) + if dtype is torch.uint8 and param == "interpolation" and value is transforms.InterpolationMode.BILINEAR + else True, ) @param_value_parametrization( @@ -5202,28 +5199,13 @@ def test_functional_and_transform(self, make_input, fn): if isinstance(input, torch.Tensor): assert output.data_ptr() == input.data_ptr() - @pytest.mark.parametrize("fn", [F.to_image, transform_cls_to_functional(transforms.ToImage)]) - def test_image_file(self, fn): - # Non-regression test for https://github.com/pytorch/vision/issues/8261 - img_np = np.random.randint(0, 256, (10, 10, 3), dtype=np.uint8) - temp_file = tempfile.NamedTemporaryFile(suffix=".jpg", delete=True) - PIL.Image.fromarray(img_np).save(temp_file.name) - - output = fn(temp_file.name) - assert isinstance(output, tv_tensors.Image) - assert F.get_size(output) == list(img_np.shape[:2]) - - temp_file.close() - def test_2d_np_array(self): # Non-regression test for https://github.com/pytorch/vision/issues/8255 input = np.random.rand(10, 10) assert F.to_image(input).shape == (1, 10, 10) def test_functional_error(self): - with pytest.raises( - TypeError, match="Input can either be a pure Tensor, a numpy array, a PIL image, or a string path" - ): + with pytest.raises(TypeError, match="Input can either be a pure Tensor, a numpy array, or a PIL image"): F.to_image(object()) From 9324a4f725fbeb036905e0bd526a92b3de0a7958 Mon Sep 17 00:00:00 2001 From: mantasu Date: Tue, 5 Mar 2024 14:16:41 +0000 Subject: [PATCH 6/7] Revert "Allow to_image to handle image paths" (see #8261) This reverts commit 2cacb30218d493905b13c8bc76e5e2db1aebf112. --- torchvision/transforms/v2/_type_conversion.py | 8 ++++---- torchvision/transforms/v2/functional/_type_conversion.py | 7 ++----- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/torchvision/transforms/v2/_type_conversion.py b/torchvision/transforms/v2/_type_conversion.py index 3ed72ca070f..7c7439b1d02 100644 --- a/torchvision/transforms/v2/_type_conversion.py +++ b/torchvision/transforms/v2/_type_conversion.py @@ -25,16 +25,16 @@ def _transform(self, inpt: PIL.Image.Image, params: Dict[str, Any]) -> torch.Ten class ToImage(Transform): - """Convert a tensor, ndarray, PIL Image, or string representing image path to - :class:`~torchvision.tv_tensors.Image`; this does not scale values. + """Convert a tensor, ndarray, or PIL Image to :class:`~torchvision.tv_tensors.Image` + ; this does not scale values. This transform does not support torchscript. """ - _transformed_types = (is_pure_tensor, PIL.Image.Image, np.ndarray, str) + _transformed_types = (is_pure_tensor, PIL.Image.Image, np.ndarray) def _transform( - self, inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray, str], params: Dict[str, Any] + self, inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray], params: Dict[str, Any] ) -> tv_tensors.Image: return F.to_image(inpt) diff --git a/torchvision/transforms/v2/functional/_type_conversion.py b/torchvision/transforms/v2/functional/_type_conversion.py index 46392bf8f1e..089a751e05d 100644 --- a/torchvision/transforms/v2/functional/_type_conversion.py +++ b/torchvision/transforms/v2/functional/_type_conversion.py @@ -9,7 +9,7 @@ @torch.jit.unused -def to_image(inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray, str]) -> tv_tensors.Image: +def to_image(inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray]) -> tv_tensors.Image: """See :class:`~torchvision.transforms.v2.ToImage` for details.""" if isinstance(inpt, np.ndarray): output = torch.from_numpy(np.atleast_3d(inpt)).permute((2, 0, 1)).contiguous() @@ -17,12 +17,9 @@ def to_image(inpt: Union[torch.Tensor, PIL.Image.Image, np.ndarray, str]) -> tv_ output = pil_to_tensor(inpt) elif isinstance(inpt, torch.Tensor): output = inpt - elif isinstance(inpt, str): - output = read_image(inpt) else: raise TypeError( - f"Input can either be a pure Tensor, a numpy array, a PIL image, " - f"or a string representing image path, but got {type(inpt)} instead." + f"Input can either be a pure Tensor, a numpy array, or a PIL image, but got {type(inpt)} instead." ) return tv_tensors.Image(output) From 1cfe17df3300ba9057507fc442a481e731d52272 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 19 Apr 2024 12:56:07 +0100 Subject: [PATCH 7/7] minor changes --- torchvision/transforms/v2/functional/_type_conversion.py | 1 - torchvision/tv_tensors/_bounding_boxes.py | 2 +- torchvision/tv_tensors/_image.py | 2 +- torchvision/tv_tensors/_mask.py | 2 +- torchvision/tv_tensors/_video.py | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/torchvision/transforms/v2/functional/_type_conversion.py b/torchvision/transforms/v2/functional/_type_conversion.py index 089a751e05d..c5a731fe143 100644 --- a/torchvision/transforms/v2/functional/_type_conversion.py +++ b/torchvision/transforms/v2/functional/_type_conversion.py @@ -4,7 +4,6 @@ import PIL.Image import torch from torchvision import tv_tensors -from torchvision.io import read_image from torchvision.transforms import functional as _F diff --git a/torchvision/tv_tensors/_bounding_boxes.py b/torchvision/tv_tensors/_bounding_boxes.py index 62997040c66..ea02fa3dc7b 100644 --- a/torchvision/tv_tensors/_bounding_boxes.py +++ b/torchvision/tv_tensors/_bounding_boxes.py @@ -25,7 +25,7 @@ class BoundingBoxFormat(Enum): class BoundingBoxes(TVTensor): - """:class:`torch.Tensor` subclass for bounding boxes with a shape of ``(N, 4)``. + """:class:`torch.Tensor` subclass for bounding boxes with shape ``[N, 4]``. .. note:: There should be only one :class:`~torchvision.tv_tensors.BoundingBoxes` diff --git a/torchvision/tv_tensors/_image.py b/torchvision/tv_tensors/_image.py index c920f98fe78..2a0a2ec7209 100644 --- a/torchvision/tv_tensors/_image.py +++ b/torchvision/tv_tensors/_image.py @@ -9,7 +9,7 @@ class Image(TVTensor): - """:class:`torch.Tensor` subclass for images with a minimum shape of ``(C x H x W)``. + """:class:`torch.Tensor` subclass for images with shape ``[..., C, H, W]``. .. note:: diff --git a/torchvision/tv_tensors/_mask.py b/torchvision/tv_tensors/_mask.py index 82ccb38a4a9..ef9d96159fb 100644 --- a/torchvision/tv_tensors/_mask.py +++ b/torchvision/tv_tensors/_mask.py @@ -9,7 +9,7 @@ class Mask(TVTensor): - """:class:`torch.Tensor` subclass for segmentation and detection masks with a minimum shape of ``(H x W)``. + """:class:`torch.Tensor` subclass for segmentation and detection masks with shape ``[..., H, W]``. Args: data (tensor-like, PIL.Image.Image): Any data that can be turned into a tensor with :func:`torch.as_tensor` as diff --git a/torchvision/tv_tensors/_video.py b/torchvision/tv_tensors/_video.py index 1f0c3317092..aa923e781ef 100644 --- a/torchvision/tv_tensors/_video.py +++ b/torchvision/tv_tensors/_video.py @@ -8,7 +8,7 @@ class Video(TVTensor): - """:class:`torch.Tensor` subclass for videos with a minimum shape of ``(T x C x H x W)``. + """:class:`torch.Tensor` subclass for videos with shape ``[..., T, C, H, W]``. Args: data (tensor-like): Any data that can be turned into a tensor with :func:`torch.as_tensor`.