Skip to content

Commit d23a061

Browse files
committed
Add utility to fetch and prepare ZIM illustration ; make this utility directly accessible from illustration metadata
1 parent 788e4ae commit d23a061

File tree

7 files changed

+178
-10
lines changed

7 files changed

+178
-10
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- Add utility to fetch and process ZIM illustration and make it directly accessible from illustration metadata (#254)
13+
1014
## [5.1.1] - 2025-02-17
1115

1216
### Changed

src/zimscraperlib/constants.py

+3
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,6 @@
2929
# default timeout to get responses from upstream when doing web requests ; this is not
3030
# the total time it gets to download the whole resource
3131
DEFAULT_WEB_REQUESTS_TIMEOUT = 10
32+
33+
DEFAULT_ZIM_ILLLUSTRATION_SIZE = 48
34+
DEFAULT_ZIM_ILLLUSTRATION_SCALE = 1
+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import io
2+
import pathlib
3+
4+
from zimscraperlib.constants import DEFAULT_ZIM_ILLLUSTRATION_SIZE
5+
from zimscraperlib.image.conversion import convert_image, convert_svg2png
6+
from zimscraperlib.image.optimization import optimize_png
7+
from zimscraperlib.image.probing import format_for
8+
from zimscraperlib.image.transformation import resize_image
9+
from zimscraperlib.inputs import handle_user_provided_file
10+
11+
12+
def get_zim_illustration(
13+
illustration_location: pathlib.Path | str,
14+
width: int = DEFAULT_ZIM_ILLLUSTRATION_SIZE,
15+
height: int = DEFAULT_ZIM_ILLLUSTRATION_SIZE,
16+
resize_method: str = "contain",
17+
) -> io.BytesIO:
18+
"""Get ZIM-ready illustration from any image path or URL
19+
20+
illustration_location will be downloaded if needed. Image is automatically
21+
converted to PNG, resized and optimized as needed.
22+
23+
Arguments:
24+
illustration_location: path or URL to an image
25+
width: target illustration width
26+
height: target illustration height
27+
resize_method: method to resize the image ; in general only 'contain' or
28+
'cover' make sense, but 'crop', 'width', 'height' and 'thumbnail' can be used
29+
"""
30+
31+
illustration_path = handle_user_provided_file(illustration_location)
32+
33+
if not illustration_path:
34+
# given handle_user_provided_file logic, this is not supposed to happen besides
35+
# when empty string is passed, hence the simple error message
36+
raise ValueError("Illustration is missing")
37+
38+
illustration = io.BytesIO()
39+
illustration_format = format_for(illustration_path, from_suffix=False)
40+
if illustration_format == "SVG":
41+
convert_svg2png(illustration_path, illustration, width, height)
42+
else:
43+
if illustration_format != "PNG":
44+
convert_image(illustration_path, illustration, fmt="PNG")
45+
else:
46+
illustration = io.BytesIO(illustration_path.read_bytes())
47+
resize_image(illustration, width, height, method=resize_method)
48+
49+
optimized_illustration = io.BytesIO()
50+
optimize_png(illustration, optimized_illustration)
51+
52+
return optimized_illustration

src/zimscraperlib/zim/metadata.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import base64
22
import datetime
33
import io
4+
import pathlib
45
from abc import ABC, abstractmethod
56
from collections.abc import Iterable
67
from dataclasses import asdict, dataclass, fields
@@ -10,12 +11,15 @@
1011
import regex
1112

1213
from zimscraperlib.constants import (
14+
DEFAULT_ZIM_ILLLUSTRATION_SCALE,
15+
DEFAULT_ZIM_ILLLUSTRATION_SIZE,
1316
ILLUSTRATIONS_METADATA_RE,
1417
MAXIMUM_DESCRIPTION_METADATA_LENGTH,
1518
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH,
1619
RECOMMENDED_MAX_TITLE_LENGTH,
1720
)
1821
from zimscraperlib.i18n import is_valid_iso_639_3
22+
from zimscraperlib.image.illustration import get_zim_illustration
1923
from zimscraperlib.image.probing import is_valid_image
2024
from zimscraperlib.inputs import unique_values
2125
from zimscraperlib.typing import SupportsRead, SupportsSeekableRead
@@ -352,12 +356,20 @@ class IllustrationBasedMetadata(Metadata):
352356
meta_mimetype = "image/png"
353357

354358
def __init__(
355-
self, value: bytes | io.IOBase | io.BytesIO, name: str | None = None
359+
self,
360+
value: bytes | io.IOBase | io.BytesIO | str | pathlib.Path,
361+
name: str | None = None,
356362
) -> None:
357363
super().__init__(value=value, name=name)
358364

359365
# native type is PNG image buffer
360-
def get_cleaned_value(self, value: bytes | io.IOBase | io.BytesIO) -> bytes:
366+
def get_cleaned_value(
367+
self, value: bytes | io.IOBase | io.BytesIO | str | pathlib.Path
368+
) -> bytes:
369+
if isinstance(value, str | pathlib.Path):
370+
value = get_zim_illustration(
371+
value, self.illustration_size, self.illustration_size
372+
)
361373
value = self.get_binary_from(value)
362374
if not is_valid_image(
363375
image=value,
@@ -413,7 +425,10 @@ class IllustrationMetadata(IllustrationBasedMetadata):
413425
illustration_scale: int = 1
414426

415427
def __init__(
416-
self, value: bytes | io.IOBase | io.BytesIO, size: int, scale: int = 1
428+
self,
429+
value: bytes | io.IOBase | io.BytesIO | str | pathlib.Path,
430+
size: int,
431+
scale: int = 1,
417432
) -> None:
418433
self.illustration_scale = scale
419434
self.illustration_size = size
@@ -423,8 +438,8 @@ def __init__(
423438
@mandatory
424439
class DefaultIllustrationMetadata(IllustrationBasedMetadata):
425440
meta_name = "Illustration_48x48@1"
426-
illustration_size: int = 48
427-
illustration_scale: int = 1
441+
illustration_size: int = DEFAULT_ZIM_ILLLUSTRATION_SIZE
442+
illustration_scale: int = DEFAULT_ZIM_ILLLUSTRATION_SCALE
428443

429444

430445
@mandatory

tests/image/test_illustration.py

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from pathlib import Path
2+
3+
import pytest
4+
from PIL.Image import open as pilopen
5+
6+
from zimscraperlib.image.illustration import get_zim_illustration
7+
8+
COMMONS_IMAGE_PATH = (Path(__file__) / "../../files/commons.png").resolve()
9+
COMMONS_48_IMAGE_PATH = (Path(__file__) / "../../files/commons48.png").resolve()
10+
NINJA_IMAGE_PATH = (Path(__file__) / "../../files/ninja.webp").resolve()
11+
12+
13+
@pytest.mark.parametrize(
14+
"user_illustration, expected_max_filesize",
15+
[
16+
pytest.param(COMMONS_IMAGE_PATH, 5000, id="big_commons"),
17+
pytest.param(COMMONS_48_IMAGE_PATH, 4000, id="small_commons"),
18+
pytest.param(NINJA_IMAGE_PATH, 5000, id="ninja"),
19+
pytest.param(
20+
"https://upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Commons-logo.svg/250px-Commons-logo.svg.png",
21+
4000,
22+
id="png_url",
23+
),
24+
pytest.param(
25+
"https://upload.wikimedia.org/wikipedia/commons/4/4a/Commons-logo.svg",
26+
4000,
27+
id="svg_url",
28+
),
29+
],
30+
)
31+
def test_get_zim_illustration(
32+
user_illustration: str | Path,
33+
expected_max_filesize: int,
34+
):
35+
image = get_zim_illustration(user_illustration)
36+
assert len(image.getvalue()) < expected_max_filesize
37+
with pilopen(image) as image_details:
38+
assert image_details.format == "PNG"
39+
assert image_details.size == (48, 48)
40+
41+
42+
def test_get_missing_user_zim_illustration():
43+
with pytest.raises(Exception, match="missing.png could not be found"):
44+
get_zim_illustration("./missing.png")
45+
46+
47+
def test_get_missing_default_zim_illustration():
48+
with pytest.raises(Exception, match="Illustration is missing"):
49+
get_zim_illustration("")
50+
51+
52+
def test_get_zim_illustration_custom_size():
53+
image = get_zim_illustration(NINJA_IMAGE_PATH, 96, 120)
54+
assert len(image.getvalue()) < 21000
55+
with pilopen(image) as image_details:
56+
assert image_details.format == "PNG"
57+
assert image_details.size == (96, 120)
58+
59+
60+
def test_get_zim_illustration_method():
61+
image_cover = get_zim_illustration(NINJA_IMAGE_PATH, resize_method="cover")
62+
image_contain = get_zim_illustration(NINJA_IMAGE_PATH, resize_method="contain")
63+
# cover image is always bigger than contain image size more pixels are
64+
# "used/non-transparent"
65+
assert len(image_cover.getvalue()) > len(image_contain.getvalue())
66+
for image in [image_cover, image_contain]:
67+
with pilopen(image) as image_details:
68+
assert image_details.format == "PNG"
69+
assert image_details.size == (48, 48)

tests/zim/test_metadata.py

+22
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import pytest
1212
from beartype.roar import BeartypeCallHintParamViolation as InvalidType
13+
from PIL.Image import open as pilopen
1314

1415
from zimscraperlib.zim import metadata
1516

@@ -213,6 +214,11 @@ def test_validate_illustration_invalid_image():
213214
metadata.IllustrationMetadata(b"PN", size=48)
214215

215216

217+
def test_validate_illustration_invalid_image_location():
218+
with pytest.raises(OSError, match="missing.png could not be found"):
219+
metadata.IllustrationMetadata("missing.png", size=48)
220+
221+
216222
def test_validate_illustration_wrong_sizes(png_image2: pathlib.Path):
217223
with open(png_image2, "rb") as fh:
218224
png_data = fh.read()
@@ -222,6 +228,22 @@ def test_validate_illustration_wrong_sizes(png_image2: pathlib.Path):
222228
metadata.IllustrationMetadata(png_data, size=48)
223229

224230

231+
def test_validate_illustration_path_resized(png_image2: pathlib.Path):
232+
with pilopen(
233+
io.BytesIO(metadata.IllustrationMetadata(png_image2, size=32).libzim_value)
234+
) as image:
235+
assert image.size == (32, 32)
236+
237+
238+
def test_validate_illustration_str_resized(png_image2: pathlib.Path):
239+
with pilopen(
240+
io.BytesIO(
241+
metadata.IllustrationMetadata(png_image2.resolve(), size=48).libzim_value
242+
)
243+
) as image:
244+
assert image.size == (48, 48)
245+
246+
225247
def test_blank_metadata():
226248
with pytest.raises(ValueError, match=r"Missing value \(empty not allowed\)"):
227249
metadata.Metadata(name="Blank", value=b"")

tests/zim/test_zim_creator.py

+8-5
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import pytest
1616
from libzim.writer import Compression # pyright: ignore[reportMissingModuleSource]
17+
from PIL.Image import open as pilopen
1718

1819
from zimscraperlib.constants import UTF8
1920
from zimscraperlib.download import save_large_file, stream_file
@@ -756,11 +757,9 @@ def test_relax_metadata(
756757
],
757758
)
758759
def test_config_metadata(
759-
tmp_path: pathlib.Path, png_image: pathlib.Path, tags: str | list[str]
760+
tmp_path: pathlib.Path, png_image2: pathlib.Path, tags: str | list[str]
760761
):
761762
fpath = tmp_path / "test_config.zim"
762-
with open(png_image, "rb") as fh:
763-
png_data = fh.read()
764763
creator = Creator(fpath, "").config_metadata(
765764
StandardMetadataList(
766765
Name=NameMetadata("wikipedia_fr_football"),
@@ -781,7 +780,7 @@ def test_config_metadata(
781780
Flavour=FlavourMetadata("nopic"),
782781
Source=SourceMetadata("https://en.wikipedia.org/"),
783782
Scraper=ScraperMetadata("mwoffliner 1.2.3"),
784-
Illustration_48x48_at_1=DefaultIllustrationMetadata(png_data),
783+
Illustration_48x48_at_1=DefaultIllustrationMetadata(png_image2),
785784
),
786785
[CustomTextMetadata("X-TestMetadata", "Test Metadata")],
787786
)
@@ -817,7 +816,11 @@ def test_config_metadata(
817816
assert reader.get_text_metadata("Flavour") == "nopic"
818817
assert reader.get_text_metadata("Source") == "https://en.wikipedia.org/"
819818
assert reader.get_text_metadata("Scraper") == "mwoffliner 1.2.3"
820-
assert reader.get_metadata("Illustration_48x48@1") == png_data
819+
with pilopen(
820+
io.BytesIO(reader.get_metadata("Illustration_48x48@1"))
821+
) as illustration:
822+
assert illustration.size == (48, 48)
823+
assert illustration.format == "PNG"
821824
assert reader.get_text_metadata("X-TestMetadata") == "Test Metadata"
822825

823826

0 commit comments

Comments
 (0)