Skip to content

Commit a4bc6b3

Browse files
authored
Merge branch 'main' into randint_int
2 parents 27a3e91 + dd66a9d commit a4bc6b3

File tree

9 files changed

+123
-61
lines changed

9 files changed

+123
-61
lines changed

test/test_image.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,15 @@ def test_decode_jpeg(img_path, pil_mode, mode):
100100
assert abs_mean_diff < 2
101101

102102

103+
@pytest.mark.parametrize("codec", ["png", "jpeg"])
103104
@pytest.mark.parametrize("orientation", [1, 2, 3, 4, 5, 6, 7, 8, 0])
104-
def test_decode_jpeg_with_exif_orientation(tmpdir, orientation):
105-
fp = os.path.join(tmpdir, f"exif_oriented_{orientation}.jpg")
105+
def test_decode_with_exif_orientation(tmpdir, codec, orientation):
106+
fp = os.path.join(tmpdir, f"exif_oriented_{orientation}.{codec}")
106107
t = torch.randint(0, 256, size=(3, 256, 257), dtype=torch.uint8)
107108
im = F.to_pil_image(t)
108109
exif = im.getexif()
109110
exif[0x0112] = orientation # set exif orientation
110-
im.save(fp, "JPEG", exif=exif.tobytes())
111+
im.save(fp, codec.upper(), exif=exif.tobytes())
111112

112113
data = read_file(fp)
113114
output = decode_image(data, apply_exif_orientation=True)

torchvision/csrc/io/image/cpu/decode_image.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ torch::Tensor decode_image(
2727
if (memcmp(jpeg_signature, datap, 3) == 0) {
2828
return decode_jpeg(data, mode, apply_exif_orientation);
2929
} else if (memcmp(png_signature, datap, 4) == 0) {
30-
return decode_png(data, mode);
30+
return decode_png(
31+
data, mode, /*allow_16_bits=*/false, apply_exif_orientation);
3132
} else {
3233
TORCH_CHECK(
3334
false,

torchvision/csrc/io/image/cpu/decode_jpeg.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ torch::Tensor decode_jpeg(
203203

204204
int exif_orientation = -1;
205205
if (apply_exif_orientation) {
206-
exif_orientation = fetch_exif_orientation(&cinfo);
206+
exif_orientation = fetch_jpeg_exif_orientation(&cinfo);
207207
}
208208

209209
jpeg_start_decompress(&cinfo);

torchvision/csrc/io/image/cpu/decode_png.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
#include "decode_png.h"
22
#include "common_png.h"
3+
#include "exif.h"
34

45
namespace vision {
56
namespace image {
67

8+
using namespace exif_private;
9+
710
#if !PNG_FOUND
811
torch::Tensor decode_png(
912
const torch::Tensor& data,
1013
ImageReadMode mode,
11-
bool allow_16_bits) {
14+
bool allow_16_bits,
15+
bool apply_exif_orientation) {
1216
TORCH_CHECK(
1317
false, "decode_png: torchvision not compiled with libPNG support");
1418
}
@@ -22,7 +26,8 @@ bool is_little_endian() {
2226
torch::Tensor decode_png(
2327
const torch::Tensor& data,
2428
ImageReadMode mode,
25-
bool allow_16_bits) {
29+
bool allow_16_bits,
30+
bool apply_exif_orientation) {
2631
C10_LOG_API_USAGE_ONCE("torchvision.csrc.io.image.cpu.decode_png.decode_png");
2732
// Check that the input tensor dtype is uint8
2833
TORCH_CHECK(data.dtype() == torch::kU8, "Expected a torch.uint8 tensor");
@@ -234,8 +239,19 @@ torch::Tensor decode_png(
234239
t_ptr = tensor.accessor<int32_t, 3>().data();
235240
}
236241
}
242+
243+
int exif_orientation = -1;
244+
if (apply_exif_orientation) {
245+
exif_orientation = fetch_png_exif_orientation(png_ptr, info_ptr);
246+
}
247+
237248
png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
238-
return tensor.permute({2, 0, 1});
249+
250+
auto output = tensor.permute({2, 0, 1});
251+
if (apply_exif_orientation) {
252+
return exif_orientation_transform(output, exif_orientation);
253+
}
254+
return output;
239255
}
240256
#endif
241257

torchvision/csrc/io/image/cpu/decode_png.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@ namespace image {
99
C10_EXPORT torch::Tensor decode_png(
1010
const torch::Tensor& data,
1111
ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED,
12-
bool allow_16_bits = false);
12+
bool allow_16_bits = false,
13+
bool apply_exif_orientation = false);
1314

1415
} // namespace image
1516
} // namespace vision

torchvision/csrc/io/image/cpu/exif.h

Lines changed: 83 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,12 @@ direct,
5151
// https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/imgcodecs/src/exif.cpp
5252

5353
#if JPEG_FOUND
54-
5554
#include <jpeglib.h>
55+
#endif
56+
#if PNG_FOUND
57+
#include <png.h>
58+
#endif
59+
5660
#include <torch/types.h>
5761

5862
namespace vision {
@@ -126,8 +130,48 @@ inline uint32_t get_uint32(
126130
(exif_data[offset + 2] << 8) + exif_data[offset + 3];
127131
}
128132

129-
inline int fetch_exif_orientation(j_decompress_ptr cinfo) {
133+
inline int fetch_exif_orientation(unsigned char* exif_data_ptr, size_t size) {
130134
int exif_orientation = -1;
135+
136+
// Exif binary structure looks like this
137+
// First 6 bytes: [E, x, i, f, 0, 0]
138+
// Endianness, 2 bytes : [M, M] or [I, I]
139+
// Tag mark, 2 bytes: [0, 0x2a]
140+
// Offset, 4 bytes
141+
// Num entries, 2 bytes
142+
// Tag entries and data, tag has 2 bytes and its data has 10 bytes
143+
// For more details:
144+
// http://www.media.mit.edu/pia/Research/deepview/exif.html
145+
146+
ExifDataReader exif_data(exif_data_ptr, size);
147+
auto endianness = get_endianness(exif_data);
148+
149+
// Checking whether Tag Mark (0x002A) correspond to one contained in the
150+
// Jpeg file
151+
uint16_t tag_mark = get_uint16(exif_data, endianness, 2);
152+
if (tag_mark == REQ_EXIF_TAG_MARK) {
153+
auto offset = get_uint32(exif_data, endianness, 4);
154+
size_t num_entry = get_uint16(exif_data, endianness, offset);
155+
offset += 2; // go to start of tag fields
156+
constexpr size_t tiff_field_size = 12;
157+
for (size_t entry = 0; entry < num_entry; entry++) {
158+
// Here we just search for orientation tag and parse it
159+
auto tag_num = get_uint16(exif_data, endianness, offset);
160+
if (tag_num == INCORRECT_TAG) {
161+
break;
162+
}
163+
if (tag_num == ORIENTATION_EXIF_TAG) {
164+
exif_orientation = get_uint16(exif_data, endianness, offset + 8);
165+
break;
166+
}
167+
offset += tiff_field_size;
168+
}
169+
}
170+
return exif_orientation;
171+
}
172+
173+
#if JPEG_FOUND
174+
inline int fetch_jpeg_exif_orientation(j_decompress_ptr cinfo) {
131175
// Check for Exif marker APP1
132176
jpeg_saved_marker_ptr exif_marker = 0;
133177
jpeg_saved_marker_ptr cmarker = cinfo->marker_list;
@@ -138,51 +182,45 @@ inline int fetch_exif_orientation(j_decompress_ptr cinfo) {
138182
cmarker = cmarker->next;
139183
}
140184

141-
if (exif_marker) {
142-
// Exif binary structure looks like this
143-
// First 6 bytes: [E, x, i, f, 0, 0]
144-
// Endianness, 2 bytes : [M, M] or [I, I]
145-
// Tag mark, 2 bytes: [0, 0x2a]
146-
// Offset, 4 bytes
147-
// Num entries, 2 bytes
148-
// Tag entries and data, tag has 2 bytes and its data has 10 bytes
149-
// For more details:
150-
// http://www.media.mit.edu/pia/Research/deepview/exif.html
151-
152-
// Bytes from Exif size field to the first TIFF header
153-
constexpr size_t start_offset = 6;
154-
if (exif_marker->data_length > start_offset) {
155-
auto* exif_data_ptr = exif_marker->data + start_offset;
156-
auto size = exif_marker->data_length - start_offset;
157-
158-
ExifDataReader exif_data(exif_data_ptr, size);
159-
auto endianness = get_endianness(exif_data);
160-
161-
// Checking whether Tag Mark (0x002A) correspond to one contained in the
162-
// Jpeg file
163-
uint16_t tag_mark = get_uint16(exif_data, endianness, 2);
164-
if (tag_mark == REQ_EXIF_TAG_MARK) {
165-
auto offset = get_uint32(exif_data, endianness, 4);
166-
size_t num_entry = get_uint16(exif_data, endianness, offset);
167-
offset += 2; // go to start of tag fields
168-
constexpr size_t tiff_field_size = 12;
169-
for (size_t entry = 0; entry < num_entry; entry++) {
170-
// Here we just search for orientation tag and parse it
171-
auto tag_num = get_uint16(exif_data, endianness, offset);
172-
if (tag_num == INCORRECT_TAG) {
173-
break;
174-
}
175-
if (tag_num == ORIENTATION_EXIF_TAG) {
176-
exif_orientation = get_uint16(exif_data, endianness, offset + 8);
177-
break;
178-
}
179-
offset += tiff_field_size;
180-
}
181-
}
182-
}
185+
if (!exif_marker) {
186+
return -1;
183187
}
184-
return exif_orientation;
188+
189+
constexpr size_t start_offset = 6;
190+
if (exif_marker->data_length <= start_offset) {
191+
return -1;
192+
}
193+
194+
auto* exif_data_ptr = exif_marker->data + start_offset;
195+
auto size = exif_marker->data_length - start_offset;
196+
197+
return fetch_exif_orientation(exif_data_ptr, size);
198+
}
199+
#else // #if JPEG_FOUND
200+
inline int fetch_jpeg_exif_orientation(j_decompress_ptr cinfo) {
201+
return -1;
202+
}
203+
#endif // #if JPEG_FOUND
204+
205+
#if PNG_FOUND && defined(PNG_eXIf_SUPPORTED)
206+
inline int fetch_png_exif_orientation(png_structp png_ptr, png_infop info_ptr) {
207+
png_uint_32 num_exif = 0;
208+
png_bytep exif = 0;
209+
210+
// Exif info could be in info_ptr
211+
if (png_get_valid(png_ptr, info_ptr, PNG_INFO_eXIf)) {
212+
png_get_eXIf_1(png_ptr, info_ptr, &num_exif, &exif);
213+
}
214+
215+
if (exif && num_exif > 0) {
216+
return fetch_exif_orientation(exif, num_exif);
217+
}
218+
}
219+
#else // #if PNG_FOUND && defined(PNG_eXIf_SUPPORTED)
220+
inline int fetch_png_exif_orientation(png_structp png_ptr, png_infop info_ptr) {
221+
return -1;
185222
}
223+
#endif // #if PNG_FOUND && defined(PNG_eXIf_SUPPORTED)
186224

187225
constexpr uint16_t IMAGE_ORIENTATION_TL = 1; // normal orientation
188226
constexpr uint16_t IMAGE_ORIENTATION_TR = 2; // needs horizontal flip
@@ -223,5 +261,3 @@ inline torch::Tensor exif_orientation_transform(
223261
} // namespace exif_private
224262
} // namespace image
225263
} // namespace vision
226-
227-
#endif

torchvision/csrc/io/image/image.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ namespace image {
2121

2222
static auto registry =
2323
torch::RegisterOperators()
24-
.op("image::decode_png", &decode_png)
24+
.op("image::decode_png(Tensor data, int mode, bool allow_16_bits = False, bool apply_exif_orientation=False) -> Tensor",
25+
&decode_png)
2526
.op("image::encode_png", &encode_png)
2627
.op("image::decode_jpeg(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor",
2728
&decode_jpeg)

torchvision/io/image.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ def write_file(filename: str, data: torch.Tensor) -> None:
6767
torch.ops.image.write_file(filename, data)
6868

6969

70-
def decode_png(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
70+
def decode_png(
71+
input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED, apply_exif_orientation: bool = False
72+
) -> torch.Tensor:
7173
"""
7274
Decodes a PNG image into a 3 dimensional RGB or grayscale Tensor.
7375
Optionally converts the image to the desired format.
@@ -80,13 +82,15 @@ def decode_png(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGE
8082
converting the image. Default: ``ImageReadMode.UNCHANGED``.
8183
See `ImageReadMode` class for more information on various
8284
available modes.
85+
apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
86+
Default: False.
8387
8488
Returns:
8589
output (Tensor[image_channels, image_height, image_width])
8690
"""
8791
if not torch.jit.is_scripting() and not torch.jit.is_tracing():
8892
_log_api_usage_once(decode_png)
89-
output = torch.ops.image.decode_png(input, mode.value, False)
93+
output = torch.ops.image.decode_png(input, mode.value, False, apply_exif_orientation)
9094
return output
9195

9296

@@ -235,7 +239,7 @@ def decode_image(
235239
See ``ImageReadMode`` class for more information on various
236240
available modes.
237241
apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
238-
Default: False. Only implemented for JPEG format
242+
Default: False.
239243
240244
Returns:
241245
output (Tensor[image_channels, image_height, image_width])
@@ -261,7 +265,7 @@ def read_image(
261265
See ``ImageReadMode`` class for more information on various
262266
available modes.
263267
apply_exif_orientation (bool): apply EXIF orientation transformation to the output tensor.
264-
Default: False. Only implemented for JPEG format
268+
Default: False.
265269
266270
Returns:
267271
output (Tensor[image_channels, image_height, image_width])

torchvision/transforms/v2/_deprecated.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class ToTensor(Transform):
1717
.. warning::
1818
:class:`v2.ToTensor` is deprecated and will be removed in a future release.
1919
Please use instead ``v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])``.
20+
Output is equivalent up to float precision.
2021
2122
This transform does not support torchscript.
2223
@@ -41,6 +42,7 @@ def __init__(self) -> None:
4142
warnings.warn(
4243
"The transform `ToTensor()` is deprecated and will be removed in a future release. "
4344
"Instead, please use `v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])`."
45+
"Output is equivalent up to float precision."
4446
)
4547
super().__init__()
4648

0 commit comments

Comments
 (0)