From 5ce7cc7336b967e812f538c65be5c35105f25671 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Thu, 16 Jan 2020 20:16:27 +0100 Subject: [PATCH 01/10] initial fix --- torchvision/transforms/functional.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 8ae75f84c5b..7602be11d7d 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -719,16 +719,33 @@ def rotate(img, angle, resample=False, expand=False, center=None, fill=0): .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters """ + NUM_BANDS = { + "1": 1, + "L": 1, + "RGB": 3, + "RGBA": 4, + } + + def verify_fill(fill, num_bands): + is_scalar = isinstance(fill, (int, float)) + if (num_bands == 1 and is_scalar) or (num_bands > 1 and num_bands == len(fill)): + return fill + if num_bands > 1 and is_scalar: + return tuple([fill] * num_bands) + # TODO: add message + raise ValueError if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) - if isinstance(fill, int): - fill = tuple([fill] * 3) + fill = verify_fill(fill, NUM_BANDS[img.mode]) return img.rotate(angle, resample, expand, center, fillcolor=fill) +# his should be a single integer or floating point value +# for single-band modes, and a tuple for multi-band modes (one value per band). + def _get_inverse_affine_matrix(center, angle, translate, scale, shear): # Helper method to compute inverse matrix for affine transformation From 17bdbd153f10f9056edcd080d529f5d23212e8cf Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Sat, 18 Jan 2020 11:57:56 +0100 Subject: [PATCH 02/10] outsourced num bands lookup --- torchvision/transforms/functional.py | 47 +++++++++++++--------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 7602be11d7d..1e6a0fdb7b4 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -28,6 +28,19 @@ def _is_pil_image(img): return isinstance(img, Image.Image) +def _pil_num_bands(img): + # for a list of all available modes see + # https://pillow.readthedocs.io/en/latest/handbook/concepts.html#modes + if img.mode in ("1", "L", "P", "I", "F", "I;16", "I;16L", "I;16B", "I;16N"): + return 1 + elif img.mode in ("LA", "PA", "La"): + return 2 + elif img.mode in ("RGB", "YCbCr", "LAB", "HSV", "BGR;15", "BGR;16", "BGR;24", "BGR;32"): + return 3 + else: # img.mode in ("RGBA", "CMYK", "RGBX", "RGBa") + return 4 + + def _is_tensor_image(img): return torch.is_tensor(img) and img.ndimension() == 3 @@ -85,14 +98,8 @@ def to_tensor(pic): img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False)) else: img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) - # PIL image mode: L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK - if pic.mode == 'YCbCr': - nchannel = 3 - elif pic.mode == 'I;16': - nchannel = 1 - else: - nchannel = len(pic.mode) - img = img.view(pic.size[1], pic.size[0], nchannel) + + img = img.view(pic.size[1], pic.size[0], _pil_num_bands(pic)) # put it from HWC to CHW format # yikes, this transpose takes 80% of the loading time/CPU img = img.transpose(0, 1).transpose(0, 2).contiguous() @@ -719,33 +726,23 @@ def rotate(img, angle, resample=False, expand=False, center=None, fill=0): .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters """ - NUM_BANDS = { - "1": 1, - "L": 1, - "RGB": 3, - "RGBA": 4, - } - def verify_fill(fill, num_bands): - is_scalar = isinstance(fill, (int, float)) - if (num_bands == 1 and is_scalar) or (num_bands > 1 and num_bands == len(fill)): - return fill - if num_bands > 1 and is_scalar: + if isinstance(fill, (int, float)): return tuple([fill] * num_bands) - # TODO: add message - raise ValueError + else: + if len(fill) == num_bands: + return fill + # TODO: add message + raise ValueError if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) - fill = verify_fill(fill, NUM_BANDS[img.mode]) + fill = verify_fill(fill, _pil_num_bands(img)) return img.rotate(angle, resample, expand, center, fillcolor=fill) -# his should be a single integer or floating point value -# for single-band modes, and a tuple for multi-band modes (one value per band). - def _get_inverse_affine_matrix(center, angle, translate, scale, shear): # Helper method to compute inverse matrix for affine transformation From 976d7d9b57255631020d74f9554b09f6f1c6a729 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Sat, 18 Jan 2020 12:07:46 +0100 Subject: [PATCH 03/10] fix doc --- torchvision/transforms/functional.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 1e6a0fdb7b4..4bff330596d 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -720,8 +720,9 @@ def rotate(img, angle, resample=False, expand=False, center=None, fill=0): center (2-tuple, optional): Optional center of rotation. Origin is the upper left corner. Default is the center of the image. - fill (3-tuple or int): RGB pixel fill value for area outside the rotated image. - If int, it is used for all channels respectively. + fill (n-tuple or int or float): Pixel fill value for area outside the rotated + image. If int or float, the value is used for all bands respectively. + Defaults to 0. .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters @@ -732,8 +733,10 @@ def verify_fill(fill, num_bands): else: if len(fill) == num_bands: return fill - # TODO: add message - raise ValueError + + msg = ("The number of elements in 'fill' does not match the number of " + "bands of the image ({} != {})") + raise ValueError(msg.format(len(fill), num_bands)) if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) From 8842bf4208eb91401f418888576b411129cd3777 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Sat, 18 Jan 2020 12:16:15 +0100 Subject: [PATCH 04/10] added pillow version requirement --- torchvision/transforms/functional.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 4bff330596d..c082fc8b019 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -703,7 +703,7 @@ def adjust_gamma(img, gamma, gain=1): return img -def rotate(img, angle, resample=False, expand=False, center=None, fill=0): +def rotate(img, angle, resample=False, expand=False, center=None, fill=None): """Rotate the image by angle. @@ -722,12 +722,20 @@ def rotate(img, angle, resample=False, expand=False, center=None, fill=0): Default is the center of the image. fill (n-tuple or int or float): Pixel fill value for area outside the rotated image. If int or float, the value is used for all bands respectively. - Defaults to 0. + Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``. .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters """ def verify_fill(fill, num_bands): + if PILLOW_VERSION < "5.2.0": + msg = ("The option to fill background area of the rotated image, " + "requires pillow>=5.2.0") + raise RuntimeError(msg) + + if fill is None: + fill = 0 + if isinstance(fill, (int, float)): return tuple([fill] * num_bands) else: From 6283dc3fe883c45e3749e3b01ca2d9261ff32d34 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Mon, 20 Jan 2020 11:39:55 +0100 Subject: [PATCH 05/10] simplify number of bands extraction --- torchvision/transforms/functional.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index c082fc8b019..53ff1815f38 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -28,19 +28,6 @@ def _is_pil_image(img): return isinstance(img, Image.Image) -def _pil_num_bands(img): - # for a list of all available modes see - # https://pillow.readthedocs.io/en/latest/handbook/concepts.html#modes - if img.mode in ("1", "L", "P", "I", "F", "I;16", "I;16L", "I;16B", "I;16N"): - return 1 - elif img.mode in ("LA", "PA", "La"): - return 2 - elif img.mode in ("RGB", "YCbCr", "LAB", "HSV", "BGR;15", "BGR;16", "BGR;24", "BGR;32"): - return 3 - else: # img.mode in ("RGBA", "CMYK", "RGBX", "RGBa") - return 4 - - def _is_tensor_image(img): return torch.is_tensor(img) and img.ndimension() == 3 @@ -99,7 +86,7 @@ def to_tensor(pic): else: img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) - img = img.view(pic.size[1], pic.size[0], _pil_num_bands(pic)) + img = img.view(pic.size[1], pic.size[0], len(pic.getbands())) # put it from HWC to CHW format # yikes, this transpose takes 80% of the loading time/CPU img = img.transpose(0, 1).transpose(0, 2).contiguous() @@ -749,7 +736,7 @@ def verify_fill(fill, num_bands): if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) - fill = verify_fill(fill, _pil_num_bands(img)) + fill = verify_fill(fill, len(img.getbands())) return img.rotate(angle, resample, expand, center, fillcolor=fill) From 4c0dceb4e554285e07ab9f64014bf84be9970ccb Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 21 Jan 2020 22:51:28 +0100 Subject: [PATCH 06/10] remove unrelated change --- torchvision/transforms/functional.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 53ff1815f38..83b27bcb0c0 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -85,8 +85,14 @@ def to_tensor(pic): img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False)) else: img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes())) - - img = img.view(pic.size[1], pic.size[0], len(pic.getbands())) + # PIL image mode: L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK + if pic.mode == 'YCbCr': + nchannel = 3 + elif pic.mode == 'I;16': + nchannel = 1 + else: + nchannel = len(pic.mode) + img = img.view(pic.size[1], pic.size[0], nchannel) # put it from HWC to CHW format # yikes, this transpose takes 80% of the loading time/CPU img = img.transpose(0, 1).transpose(0, 2).contiguous() From 4bb4f900aa868773821e68d245a824b6932194a6 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 21 Jan 2020 23:00:15 +0100 Subject: [PATCH 07/10] remove indirect dependency on pillow>=5.2.0 --- torchvision/transforms/functional.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 83b27bcb0c0..f2d53d00f50 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -720,31 +720,32 @@ def rotate(img, angle, resample=False, expand=False, center=None, fill=None): .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters """ - def verify_fill(fill, num_bands): + def parse_fill(fill, num_bands): if PILLOW_VERSION < "5.2.0": - msg = ("The option to fill background area of the rotated image, " - "requires pillow>=5.2.0") - raise RuntimeError(msg) + if fill is None: + return {} + else: + msg = ("The option to fill background area of the rotated image, " + "requires pillow>=5.2.0") + raise RuntimeError(msg) if fill is None: fill = 0 - if isinstance(fill, (int, float)): - return tuple([fill] * num_bands) - else: - if len(fill) == num_bands: - return fill - + fill = tuple([fill] * num_bands) + if len(fill) != num_bands: msg = ("The number of elements in 'fill' does not match the number of " "bands of the image ({} != {})") raise ValueError(msg.format(len(fill), num_bands)) + return {"fillcolor", fill} + if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) - fill = verify_fill(fill, len(img.getbands())) + opts = parse_fill(fill, len(img.getbands())) - return img.rotate(angle, resample, expand, center, fillcolor=fill) + return img.rotate(angle, resample, expand, center, **opts) def _get_inverse_affine_matrix(center, angle, translate, scale, shear): From 80c4f8007738c0551ba2e2d473a46fc9070e19f5 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Tue, 21 Jan 2020 23:03:30 +0100 Subject: [PATCH 08/10] extend docstring to transform --- torchvision/transforms/transforms.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index 3ec84aae84c..393e3c2db33 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -956,14 +956,15 @@ class RandomRotation(object): center (2-tuple, optional): Optional center of rotation. Origin is the upper left corner. Default is the center of the image. - fill (3-tuple or int): RGB pixel fill value for area outside the rotated image. - If int, it is used for all channels respectively. + fill (n-tuple or int or float): Pixel fill value for area outside the rotated + image. If int or float, the value is used for all bands respectively. + Defaults to 0 for all bands. This option is only available for ``pillow>=5.2.0``. .. _filters: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#filters """ - def __init__(self, degrees, resample=False, expand=False, center=None, fill=0): + def __init__(self, degrees, resample=False, expand=False, center=None, fill=None): if isinstance(degrees, numbers.Number): if degrees < 0: raise ValueError("If degrees is a single number, it must be positive.") From 69b167e83d6bb8ef05d1823c13ef4f4779d59533 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 22 Jan 2020 10:16:47 +0100 Subject: [PATCH 09/10] bug fix --- torchvision/transforms/functional.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index f2d53d00f50..299b0203944 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -738,7 +738,7 @@ def parse_fill(fill, num_bands): "bands of the image ({} != {})") raise ValueError(msg.format(len(fill), num_bands)) - return {"fillcolor", fill} + return {"fillcolor": fill} if not _is_pil_image(img): raise TypeError('img should be PIL Image. Got {}'.format(type(img))) From f34208834f78adb443855b340f135b6cf7cb9670 Mon Sep 17 00:00:00 2001 From: Philip Meier Date: Wed, 22 Jan 2020 11:12:00 +0100 Subject: [PATCH 10/10] added test --- test/test_transforms.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/test/test_transforms.py b/test/test_transforms.py index a801360424c..3a76f3a1adb 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -1,5 +1,6 @@ from __future__ import division import os +import mock import torch import torchvision.transforms as transforms import torchvision.transforms.functional as F @@ -1074,6 +1075,26 @@ def test_rotate(self): self.assertTrue(np.all(np.array(result_a) == np.array(result_b))) + def test_rotate_fill(self): + img = F.to_pil_image(np.ones((100, 100, 3), dtype=np.uint8) * 255, "RGB") + + modes = ("L", "RGB") + nums_bands = [len(mode) for mode in modes] + fill = 127 + + for mode, num_bands in zip(modes, nums_bands): + img_conv = img.convert(mode) + img_rot = F.rotate(img_conv, 45.0, fill=fill) + pixel = img_rot.getpixel((0, 0)) + + if not isinstance(pixel, tuple): + pixel = (pixel,) + self.assertTupleEqual(pixel, tuple([fill] * num_bands)) + + for wrong_num_bands in set(nums_bands) - {num_bands}: + with self.assertRaises(ValueError): + F.rotate(img_conv, 45.0, fill=tuple([fill] * wrong_num_bands)) + def test_affine(self): input_img = np.zeros((40, 40, 3), dtype=np.uint8) pts = []