Skip to content

Add utility to fetch and prepare ZIM illustration ; make this utility directly accessible from illustration metadata #260

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Upgrade to wombat 3.8.11 (#256)

### Added

- Add utility to fetch and prepare ZIM illustration ; make this utility directly accessible from illustration metadata (#254)

## [5.1.1] - 2025-02-17

### Changed
Expand Down
3 changes: 3 additions & 0 deletions src/zimscraperlib/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@
# default timeout to get responses from upstream when doing web requests ; this is not
# the total time it gets to download the whole resource
DEFAULT_WEB_REQUESTS_TIMEOUT = 10

DEFAULT_ZIM_ILLLUSTRATION_SIZE = 48
DEFAULT_ZIM_ILLLUSTRATION_SCALE = 1
52 changes: 52 additions & 0 deletions src/zimscraperlib/image/illustration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import io
import pathlib

from zimscraperlib.constants import DEFAULT_ZIM_ILLLUSTRATION_SIZE
from zimscraperlib.image.conversion import convert_image, convert_svg2png
from zimscraperlib.image.optimization import optimize_png
from zimscraperlib.image.probing import format_for
from zimscraperlib.image.transformation import resize_image
from zimscraperlib.inputs import handle_user_provided_file


def get_zim_illustration(
illustration_location: pathlib.Path | str,
width: int = DEFAULT_ZIM_ILLLUSTRATION_SIZE,
height: int = DEFAULT_ZIM_ILLLUSTRATION_SIZE,
resize_method: str = "contain",
) -> io.BytesIO:
"""Get ZIM-ready illustration from any image path or URL

illustration_location will be downloaded if needed. Image is automatically
converted to PNG, resized and optimized as needed.

Arguments:
illustration_location: path or URL to an image
width: target illustration width
height: target illustration height
resize_method: method to resize the image ; in general only 'contain' or
'cover' make sense, but 'crop', 'width', 'height' and 'thumbnail' can be used
"""

illustration_path = handle_user_provided_file(illustration_location)

if not illustration_path:
# given handle_user_provided_file logic, this is not supposed to happen besides
# when empty string is passed, hence the simple error message
raise ValueError("Illustration is missing")

illustration = io.BytesIO()
illustration_format = format_for(illustration_path, from_suffix=False)
if illustration_format == "SVG":
convert_svg2png(illustration_path, illustration, width, height)
else:
if illustration_format != "PNG":
convert_image(illustration_path, illustration, fmt="PNG")
else:
illustration = io.BytesIO(illustration_path.read_bytes())
resize_image(illustration, width, height, method=resize_method)

optimized_illustration = io.BytesIO()
optimize_png(illustration, optimized_illustration)

return optimized_illustration
25 changes: 20 additions & 5 deletions src/zimscraperlib/zim/metadata.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import base64
import datetime
import io
import pathlib
from abc import ABC, abstractmethod
from collections.abc import Iterable
from dataclasses import asdict, dataclass, fields
Expand All @@ -10,12 +11,15 @@
import regex

from zimscraperlib.constants import (
DEFAULT_ZIM_ILLLUSTRATION_SCALE,
DEFAULT_ZIM_ILLLUSTRATION_SIZE,
ILLUSTRATIONS_METADATA_RE,
MAXIMUM_DESCRIPTION_METADATA_LENGTH,
MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH,
RECOMMENDED_MAX_TITLE_LENGTH,
)
from zimscraperlib.i18n import is_valid_iso_639_3
from zimscraperlib.image.illustration import get_zim_illustration
from zimscraperlib.image.probing import is_valid_image
from zimscraperlib.inputs import unique_values
from zimscraperlib.typing import SupportsRead, SupportsSeekableRead
Expand Down Expand Up @@ -352,12 +356,20 @@ class IllustrationBasedMetadata(Metadata):
meta_mimetype = "image/png"

def __init__(
self, value: bytes | io.IOBase | io.BytesIO, name: str | None = None
self,
value: bytes | io.IOBase | io.BytesIO | str | pathlib.Path,
name: str | None = None,
) -> None:
super().__init__(value=value, name=name)

# native type is PNG image buffer
def get_cleaned_value(self, value: bytes | io.IOBase | io.BytesIO) -> bytes:
def get_cleaned_value(
self, value: bytes | io.IOBase | io.BytesIO | str | pathlib.Path
) -> bytes:
if isinstance(value, str | pathlib.Path):
value = get_zim_illustration(
value, self.illustration_size, self.illustration_size
)
value = self.get_binary_from(value)
if not is_valid_image(
image=value,
Expand Down Expand Up @@ -413,7 +425,10 @@ class IllustrationMetadata(IllustrationBasedMetadata):
illustration_scale: int = 1

def __init__(
self, value: bytes | io.IOBase | io.BytesIO, size: int, scale: int = 1
self,
value: bytes | io.IOBase | io.BytesIO | str | pathlib.Path,
size: int,
scale: int = 1,
) -> None:
self.illustration_scale = scale
self.illustration_size = size
Expand All @@ -423,8 +438,8 @@ def __init__(
@mandatory
class DefaultIllustrationMetadata(IllustrationBasedMetadata):
meta_name = "Illustration_48x48@1"
illustration_size: int = 48
illustration_scale: int = 1
illustration_size: int = DEFAULT_ZIM_ILLLUSTRATION_SIZE
illustration_scale: int = DEFAULT_ZIM_ILLLUSTRATION_SCALE


@mandatory
Expand Down
69 changes: 69 additions & 0 deletions tests/image/test_illustration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from pathlib import Path

import pytest
from PIL.Image import open as pilopen

from zimscraperlib.image.illustration import get_zim_illustration

COMMONS_IMAGE_PATH = (Path(__file__) / "../../files/commons.png").resolve()
COMMONS_48_IMAGE_PATH = (Path(__file__) / "../../files/commons48.png").resolve()
NINJA_IMAGE_PATH = (Path(__file__) / "../../files/ninja.webp").resolve()


@pytest.mark.parametrize(
"user_illustration, expected_max_filesize",
[
pytest.param(COMMONS_IMAGE_PATH, 5000, id="big_commons"),
pytest.param(COMMONS_48_IMAGE_PATH, 4000, id="small_commons"),
pytest.param(NINJA_IMAGE_PATH, 5000, id="ninja"),
pytest.param(
"https://upload.wikimedia.org/wikipedia/commons/thumb/4/4a/Commons-logo.svg/250px-Commons-logo.svg.png",
4000,
id="png_url",
),
pytest.param(
"https://upload.wikimedia.org/wikipedia/commons/4/4a/Commons-logo.svg",
4000,
id="svg_url",
),
],
)
def test_get_zim_illustration(
user_illustration: str | Path,
expected_max_filesize: int,
):
image = get_zim_illustration(user_illustration)
assert len(image.getvalue()) < expected_max_filesize
with pilopen(image) as image_details:
assert image_details.format == "PNG"
assert image_details.size == (48, 48)


def test_get_missing_user_zim_illustration():
with pytest.raises(Exception, match="missing.png could not be found"):
get_zim_illustration("./missing.png")


def test_get_missing_default_zim_illustration():
with pytest.raises(Exception, match="Illustration is missing"):
get_zim_illustration("")


def test_get_zim_illustration_custom_size():
image = get_zim_illustration(NINJA_IMAGE_PATH, 96, 120)
assert len(image.getvalue()) < 21000
with pilopen(image) as image_details:
assert image_details.format == "PNG"
assert image_details.size == (96, 120)


def test_get_zim_illustration_method():
image_cover = get_zim_illustration(NINJA_IMAGE_PATH, resize_method="cover")
image_contain = get_zim_illustration(NINJA_IMAGE_PATH, resize_method="contain")
# cover image is always bigger than contain image size more pixels are
# "used/non-transparent"
assert len(image_cover.getvalue()) > len(image_contain.getvalue())
for image in [image_cover, image_contain]:
with pilopen(image) as image_details:
assert image_details.format == "PNG"
assert image_details.size == (48, 48)
22 changes: 22 additions & 0 deletions tests/zim/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import pytest
from beartype.roar import BeartypeCallHintParamViolation as InvalidType
from PIL.Image import open as pilopen

from zimscraperlib.zim import metadata

Expand Down Expand Up @@ -213,6 +214,11 @@ def test_validate_illustration_invalid_image():
metadata.IllustrationMetadata(b"PN", size=48)


def test_validate_illustration_invalid_image_location():
with pytest.raises(OSError, match="missing.png could not be found"):
metadata.IllustrationMetadata("missing.png", size=48)


def test_validate_illustration_wrong_sizes(png_image2: pathlib.Path):
with open(png_image2, "rb") as fh:
png_data = fh.read()
Expand All @@ -222,6 +228,22 @@ def test_validate_illustration_wrong_sizes(png_image2: pathlib.Path):
metadata.IllustrationMetadata(png_data, size=48)


def test_validate_illustration_path_resized(png_image2: pathlib.Path):
with pilopen(
io.BytesIO(metadata.IllustrationMetadata(png_image2, size=32).libzim_value)
) as image:
assert image.size == (32, 32)


def test_validate_illustration_str_resized(png_image2: pathlib.Path):
with pilopen(
io.BytesIO(
metadata.IllustrationMetadata(png_image2.resolve(), size=48).libzim_value
)
) as image:
assert image.size == (48, 48)


def test_blank_metadata():
with pytest.raises(ValueError, match=r"Missing value \(empty not allowed\)"):
metadata.Metadata(name="Blank", value=b"")
Expand Down
13 changes: 8 additions & 5 deletions tests/zim/test_zim_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import pytest
from libzim.writer import Compression # pyright: ignore[reportMissingModuleSource]
from PIL.Image import open as pilopen

from zimscraperlib.constants import UTF8
from zimscraperlib.download import save_large_file, stream_file
Expand Down Expand Up @@ -756,11 +757,9 @@ def test_relax_metadata(
],
)
def test_config_metadata(
tmp_path: pathlib.Path, png_image: pathlib.Path, tags: str | list[str]
tmp_path: pathlib.Path, png_image2: pathlib.Path, tags: str | list[str]
):
fpath = tmp_path / "test_config.zim"
with open(png_image, "rb") as fh:
png_data = fh.read()
creator = Creator(fpath, "").config_metadata(
StandardMetadataList(
Name=NameMetadata("wikipedia_fr_football"),
Expand All @@ -781,7 +780,7 @@ def test_config_metadata(
Flavour=FlavourMetadata("nopic"),
Source=SourceMetadata("https://en.wikipedia.org/"),
Scraper=ScraperMetadata("mwoffliner 1.2.3"),
Illustration_48x48_at_1=DefaultIllustrationMetadata(png_data),
Illustration_48x48_at_1=DefaultIllustrationMetadata(png_image2),
),
[CustomTextMetadata("X-TestMetadata", "Test Metadata")],
)
Expand Down Expand Up @@ -817,7 +816,11 @@ def test_config_metadata(
assert reader.get_text_metadata("Flavour") == "nopic"
assert reader.get_text_metadata("Source") == "https://en.wikipedia.org/"
assert reader.get_text_metadata("Scraper") == "mwoffliner 1.2.3"
assert reader.get_metadata("Illustration_48x48@1") == png_data
with pilopen(
io.BytesIO(reader.get_metadata("Illustration_48x48@1"))
) as illustration:
assert illustration.size == (48, 48)
assert illustration.format == "PNG"
assert reader.get_text_metadata("X-TestMetadata") == "Test Metadata"


Expand Down