Skip to content

Commit f3298dc

Browse files
authored
Added support for EXIF orientation transform in read_image for JPEG (#8279)
1 parent bca7a49 commit f3298dc

File tree

8 files changed

+302
-15
lines changed

8 files changed

+302
-15
lines changed

test/test_image.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import torch
1010
import torchvision.transforms.functional as F
1111
from common_utils import assert_equal, needs_cuda
12-
from PIL import __version__ as PILLOW_VERSION, Image
12+
from PIL import __version__ as PILLOW_VERSION, Image, ImageOps
1313
from torchvision.io.image import (
1414
_read_png_16,
1515
decode_image,
@@ -100,6 +100,44 @@ def test_decode_jpeg(img_path, pil_mode, mode):
100100
assert abs_mean_diff < 2
101101

102102

103+
@pytest.mark.parametrize("orientation", [1, 2, 3, 4, 5, 6, 7, 8, 0])
104+
def test_decode_jpeg_with_exif_orientation(tmpdir, orientation):
105+
fp = os.path.join(tmpdir, f"exif_oriented_{orientation}.jpg")
106+
t = torch.randint(0, 256, size=(3, 256, 257), dtype=torch.uint8)
107+
im = F.to_pil_image(t)
108+
exif = im.getexif()
109+
exif[0x0112] = orientation # set exif orientation
110+
im.save(fp, "JPEG", exif=exif.tobytes())
111+
112+
data = read_file(fp)
113+
output = decode_image(data, apply_exif_orientation=True)
114+
115+
pimg = Image.open(fp)
116+
pimg = ImageOps.exif_transpose(pimg)
117+
118+
expected = F.pil_to_tensor(pimg)
119+
torch.testing.assert_close(expected, output)
120+
121+
122+
@pytest.mark.parametrize("size", [65533, 1, 7, 10, 23, 33])
123+
def test_invalid_exif(tmpdir, size):
124+
# Inspired from a PIL test:
125+
# https://github.com/python-pillow/Pillow/blob/8f63748e50378424628155994efd7e0739a4d1d1/Tests/test_file_jpeg.py#L299
126+
fp = os.path.join(tmpdir, "invalid_exif.jpg")
127+
t = torch.randint(0, 256, size=(3, 256, 257), dtype=torch.uint8)
128+
im = F.to_pil_image(t)
129+
im.save(fp, "JPEG", exif=b"1" * size)
130+
131+
data = read_file(fp)
132+
output = decode_image(data, apply_exif_orientation=True)
133+
134+
pimg = Image.open(fp)
135+
pimg = ImageOps.exif_transpose(pimg)
136+
137+
expected = F.pil_to_tensor(pimg)
138+
torch.testing.assert_close(expected, output)
139+
140+
103141
def test_decode_jpeg_errors():
104142
with pytest.raises(RuntimeError, match="Expected a non empty 1-dimensional tensor"):
105143
decode_jpeg(torch.empty((100, 1), dtype=torch.uint8))

torchvision/csrc/io/image/cpu/decode_image.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
namespace vision {
77
namespace image {
88

9-
torch::Tensor decode_image(const torch::Tensor& data, ImageReadMode mode) {
9+
torch::Tensor decode_image(
10+
const torch::Tensor& data,
11+
ImageReadMode mode,
12+
bool apply_exif_orientation) {
1013
// Check that tensor is a CPU tensor
1114
TORCH_CHECK(data.device() == torch::kCPU, "Expected a CPU tensor");
1215
// Check that the input tensor dtype is uint8
@@ -22,7 +25,7 @@ torch::Tensor decode_image(const torch::Tensor& data, ImageReadMode mode) {
2225
const uint8_t png_signature[4] = {137, 80, 78, 71}; // == "\211PNG"
2326

2427
if (memcmp(jpeg_signature, datap, 3) == 0) {
25-
return decode_jpeg(data, mode);
28+
return decode_jpeg(data, mode, apply_exif_orientation);
2629
} else if (memcmp(png_signature, datap, 4) == 0) {
2730
return decode_png(data, mode);
2831
} else {

torchvision/csrc/io/image/cpu/decode_image.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ namespace image {
88

99
C10_EXPORT torch::Tensor decode_image(
1010
const torch::Tensor& data,
11-
ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED);
11+
ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED,
12+
bool apply_exif_orientation = false);
1213

1314
} // namespace image
1415
} // namespace vision

torchvision/csrc/io/image/cpu/decode_jpeg.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "decode_jpeg.h"
22
#include "common_jpeg.h"
3+
#include "exif.h"
34

45
namespace vision {
56
namespace image {
@@ -12,6 +13,7 @@ torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) {
1213
#else
1314

1415
using namespace detail;
16+
using namespace exif_private;
1517

1618
namespace {
1719

@@ -65,6 +67,8 @@ static void torch_jpeg_set_source_mgr(
6567
src->len = len;
6668
src->pub.bytes_in_buffer = len;
6769
src->pub.next_input_byte = src->data;
70+
71+
jpeg_save_markers(cinfo, APP1, 0xffff);
6872
}
6973

7074
inline unsigned char clamped_cmyk_rgb_convert(
@@ -121,7 +125,10 @@ void convert_line_cmyk_to_gray(
121125

122126
} // namespace
123127

124-
torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) {
128+
torch::Tensor decode_jpeg(
129+
const torch::Tensor& data,
130+
ImageReadMode mode,
131+
bool apply_exif_orientation) {
125132
C10_LOG_API_USAGE_ONCE(
126133
"torchvision.csrc.io.image.cpu.decode_jpeg.decode_jpeg");
127134
// Check that the input tensor dtype is uint8
@@ -191,6 +198,11 @@ torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) {
191198
jpeg_calc_output_dimensions(&cinfo);
192199
}
193200

201+
int exif_orientation = -1;
202+
if (apply_exif_orientation) {
203+
exif_orientation = fetch_exif_orientation(&cinfo);
204+
}
205+
194206
jpeg_start_decompress(&cinfo);
195207

196208
int height = cinfo.output_height;
@@ -227,7 +239,12 @@ torch::Tensor decode_jpeg(const torch::Tensor& data, ImageReadMode mode) {
227239

228240
jpeg_finish_decompress(&cinfo);
229241
jpeg_destroy_decompress(&cinfo);
230-
return tensor.permute({2, 0, 1});
242+
auto output = tensor.permute({2, 0, 1});
243+
244+
if (apply_exif_orientation) {
245+
return exif_orientation_transform(output, exif_orientation);
246+
}
247+
return output;
231248
}
232249
#endif // #if !JPEG_FOUND
233250

torchvision/csrc/io/image/cpu/decode_jpeg.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ namespace image {
88

99
C10_EXPORT torch::Tensor decode_jpeg(
1010
const torch::Tensor& data,
11-
ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED);
11+
ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED,
12+
bool apply_exif_orientation = false);
1213

1314
C10_EXPORT int64_t _jpeg_version();
1415
C10_EXPORT bool _is_compiled_against_turbo();

torchvision/csrc/io/image/cpu/exif.h

Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
/*M///////////////////////////////////////////////////////////////////////////////////////
2+
//
3+
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4+
//
5+
// By downloading, copying, installing or using the software you agree to this
6+
license.
7+
// If you do not agree to this license, do not download, install,
8+
// copy or use the software.
9+
//
10+
//
11+
// License Agreement
12+
// For Open Source Computer Vision Library
13+
//
14+
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
15+
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
16+
// Third party copyrights are property of their respective owners.
17+
//
18+
// Redistribution and use in source and binary forms, with or without
19+
modification,
20+
// are permitted provided that the following conditions are met:
21+
//
22+
// * Redistribution's of source code must retain the above copyright notice,
23+
// this list of conditions and the following disclaimer.
24+
//
25+
// * Redistribution's in binary form must reproduce the above copyright
26+
notice,
27+
// this list of conditions and the following disclaimer in the documentation
28+
// and/or other materials provided with the distribution.
29+
//
30+
// * The name of the copyright holders may not be used to endorse or promote
31+
products
32+
// derived from this software without specific prior written permission.
33+
//
34+
// This software is provided by the copyright holders and contributors "as is"
35+
and
36+
// any express or implied warranties, including, but not limited to, the implied
37+
// warranties of merchantability and fitness for a particular purpose are
38+
disclaimed.
39+
// In no event shall the Intel Corporation or contributors be liable for any
40+
direct,
41+
// indirect, incidental, special, exemplary, or consequential damages
42+
// (including, but not limited to, procurement of substitute goods or services;
43+
// loss of use, data, or profits; or business interruption) however caused
44+
// and on any theory of liability, whether in contract, strict liability,
45+
// or tort (including negligence or otherwise) arising in any way out of
46+
// the use of this software, even if advised of the possibility of such damage.
47+
//
48+
//M*/
49+
#pragma once
50+
// Functions in this module are taken from OpenCV
51+
// https://github.com/opencv/opencv/blob/097891e311fae1d8354eb092a0fd0171e630d78c/modules/imgcodecs/src/exif.cpp
52+
53+
#include <jpeglib.h>
54+
#include <torch/types.h>
55+
#include <vector>
56+
57+
namespace vision {
58+
namespace image {
59+
namespace exif_private {
60+
61+
constexpr uint16_t APP1 = 0xe1;
62+
constexpr uint16_t ENDIANNESS_INTEL = 0x49;
63+
constexpr uint16_t ENDIANNESS_MOTO = 0x4d;
64+
constexpr uint16_t REQ_EXIF_TAG_MARK = 0x2a;
65+
constexpr uint16_t ORIENTATION_EXIF_TAG = 0x0112;
66+
constexpr uint16_t INCORRECT_TAG = -1;
67+
68+
inline uint16_t get_endianness(const std::vector<unsigned char>& exif_data) {
69+
if ((exif_data.size() < 1) ||
70+
(exif_data.size() > 1 && exif_data[0] != exif_data[1])) {
71+
return 0;
72+
}
73+
if (exif_data[0] == 'I') {
74+
return ENDIANNESS_INTEL;
75+
}
76+
if (exif_data[0] == 'M') {
77+
return ENDIANNESS_MOTO;
78+
}
79+
return 0;
80+
}
81+
82+
inline uint16_t get_uint16(
83+
const std::vector<unsigned char>& exif_data,
84+
uint16_t endianness,
85+
const size_t offset) {
86+
if (offset + 1 >= exif_data.size()) {
87+
return INCORRECT_TAG;
88+
}
89+
90+
if (endianness == ENDIANNESS_INTEL) {
91+
return exif_data[offset] + (exif_data[offset + 1] << 8);
92+
}
93+
return (exif_data[offset] << 8) + exif_data[offset + 1];
94+
}
95+
96+
inline uint32_t get_uint32(
97+
const std::vector<unsigned char>& exif_data,
98+
uint16_t endianness,
99+
const size_t offset) {
100+
if (offset + 3 >= exif_data.size()) {
101+
return INCORRECT_TAG;
102+
}
103+
104+
if (endianness == ENDIANNESS_INTEL) {
105+
return exif_data[offset] + (exif_data[offset + 1] << 8) +
106+
(exif_data[offset + 2] << 16) + (exif_data[offset + 3] << 24);
107+
}
108+
return (exif_data[offset] << 24) + (exif_data[offset + 1] << 16) +
109+
(exif_data[offset + 2] << 8) + exif_data[offset + 3];
110+
}
111+
112+
inline int fetch_exif_orientation(j_decompress_ptr cinfo) {
113+
int exif_orientation = -1;
114+
// Check for Exif marker APP1
115+
jpeg_saved_marker_ptr exif_marker = 0;
116+
jpeg_saved_marker_ptr cmarker = cinfo->marker_list;
117+
while (cmarker && exif_marker == 0) {
118+
if (cmarker->marker == APP1) {
119+
exif_marker = cmarker;
120+
}
121+
cmarker = cmarker->next;
122+
}
123+
124+
if (exif_marker) {
125+
// Exif binary structure looks like this
126+
// First 6 bytes: [E, x, i, f, 0, 0]
127+
// Endianness, 2 bytes : [M, M] or [I, I]
128+
// Tag mark, 2 bytes: [0, 0x2a]
129+
// Offset, 4 bytes
130+
// Num entries, 2 bytes
131+
// Tag entries and data, tag has 2 bytes and its data has 10 bytes
132+
// For more details:
133+
// http://www.media.mit.edu/pia/Research/deepview/exif.html
134+
135+
// Bytes from Exif size field to the first TIFF header
136+
constexpr size_t start_offset = 6;
137+
if (exif_marker->data_length > start_offset) {
138+
auto* exif_data_ptr = exif_marker->data + start_offset;
139+
auto size = exif_marker->data_length - start_offset;
140+
// Here we copy the data into the vector structure
141+
// TODO: we can avoid copying the data and read directly from the pointer
142+
std::vector<unsigned char> exif_data_vec(
143+
exif_data_ptr, exif_data_ptr + size);
144+
145+
auto endianness = get_endianness(exif_data_vec);
146+
147+
// Checking whether Tag Mark (0x002A) correspond to one contained in the
148+
// Jpeg file
149+
uint16_t tag_mark = get_uint16(exif_data_vec, endianness, 2);
150+
if (tag_mark == REQ_EXIF_TAG_MARK) {
151+
auto offset = get_uint32(exif_data_vec, endianness, 4);
152+
size_t num_entry = get_uint16(exif_data_vec, endianness, offset);
153+
offset += 2; // go to start of tag fields
154+
constexpr size_t tiff_field_size = 12;
155+
for (size_t entry = 0; entry < num_entry; entry++) {
156+
// Here we just search for orientation tag and parse it
157+
auto tag_num = get_uint16(exif_data_vec, endianness, offset);
158+
if (tag_num == INCORRECT_TAG) {
159+
break;
160+
}
161+
if (tag_num == ORIENTATION_EXIF_TAG) {
162+
exif_orientation =
163+
get_uint16(exif_data_vec, endianness, offset + 8);
164+
break;
165+
}
166+
offset += tiff_field_size;
167+
}
168+
}
169+
}
170+
}
171+
return exif_orientation;
172+
}
173+
174+
constexpr uint16_t IMAGE_ORIENTATION_TL = 1; // normal orientation
175+
constexpr uint16_t IMAGE_ORIENTATION_TR = 2; // needs horizontal flip
176+
constexpr uint16_t IMAGE_ORIENTATION_BR = 3; // needs 180 rotation
177+
constexpr uint16_t IMAGE_ORIENTATION_BL = 4; // needs vertical flip
178+
constexpr uint16_t IMAGE_ORIENTATION_LT =
179+
5; // mirrored horizontal & rotate 270 CW
180+
constexpr uint16_t IMAGE_ORIENTATION_RT = 6; // rotate 90 CW
181+
constexpr uint16_t IMAGE_ORIENTATION_RB =
182+
7; // mirrored horizontal & rotate 90 CW
183+
constexpr uint16_t IMAGE_ORIENTATION_LB = 8; // needs 270 CW rotation
184+
185+
inline torch::Tensor exif_orientation_transform(
186+
const torch::Tensor& image,
187+
int orientation) {
188+
if (orientation == IMAGE_ORIENTATION_TL) {
189+
return image;
190+
} else if (orientation == IMAGE_ORIENTATION_TR) {
191+
return image.flip(-1);
192+
} else if (orientation == IMAGE_ORIENTATION_BR) {
193+
// needs 180 rotation equivalent to
194+
// flip both horizontally and vertically
195+
return image.flip({-2, -1});
196+
} else if (orientation == IMAGE_ORIENTATION_BL) {
197+
return image.flip(-2);
198+
} else if (orientation == IMAGE_ORIENTATION_LT) {
199+
return image.transpose(-1, -2);
200+
} else if (orientation == IMAGE_ORIENTATION_RT) {
201+
return image.transpose(-1, -2).flip(-1);
202+
} else if (orientation == IMAGE_ORIENTATION_RB) {
203+
return image.transpose(-1, -2).flip({-2, -1});
204+
} else if (orientation == IMAGE_ORIENTATION_LB) {
205+
return image.transpose(-1, -2).flip(-2);
206+
}
207+
return image;
208+
}
209+
210+
} // namespace exif_private
211+
} // namespace image
212+
} // namespace vision

torchvision/csrc/io/image/image.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,13 @@ static auto registry =
2323
torch::RegisterOperators()
2424
.op("image::decode_png", &decode_png)
2525
.op("image::encode_png", &encode_png)
26-
.op("image::decode_jpeg", &decode_jpeg)
26+
.op("image::decode_jpeg(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor",
27+
&decode_jpeg)
2728
.op("image::encode_jpeg", &encode_jpeg)
2829
.op("image::read_file", &read_file)
2930
.op("image::write_file", &write_file)
30-
.op("image::decode_image", &decode_image)
31+
.op("image::decode_image(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor",
32+
&decode_image)
3133
.op("image::decode_jpeg_cuda", &decode_jpeg_cuda)
3234
.op("image::_jpeg_version", &_jpeg_version)
3335
.op("image::_is_compiled_against_turbo", &_is_compiled_against_turbo);

0 commit comments

Comments
 (0)