|
1 | 1 | # SPDX-License-Identifier: Apache-2.0 |
2 | 2 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project |
| 3 | +import io |
3 | 4 | from pathlib import Path |
4 | 5 |
|
5 | 6 | import numpy as np |
6 | 7 | import numpy.typing as npt |
| 8 | +import pybase64 |
7 | 9 | import pytest |
8 | 10 | from PIL import Image |
9 | 11 |
|
@@ -235,3 +237,53 @@ def test_video_media_io_backend_env_var_fallback(monkeypatch: pytest.MonkeyPatch |
235 | 237 | frames_missing, metadata_missing = videoio_missing.load_bytes(b"test") |
236 | 238 | np.testing.assert_array_equal(frames_missing, FAKE_OUTPUT_2) |
237 | 239 | assert metadata_missing["video_backend"] == "test_video_backend_override_2" |
| 240 | + |
| 241 | + |
| 242 | +def test_load_base64_jpeg_returns_metadata(): |
| 243 | + """Regression test: load_base64 with video/jpeg must return metadata. |
| 244 | +
|
| 245 | + Previously, base64 JPEG frame sequences returned an empty dict for |
| 246 | + metadata, which broke downstream consumers that rely on fields like |
| 247 | + total_num_frames and fps. See PR #37301. |
| 248 | + """ |
| 249 | + |
| 250 | + num_test_frames = 3 |
| 251 | + frame_width, frame_height = 8, 8 |
| 252 | + |
| 253 | + # Build a few tiny JPEG frames and base64-encode them |
| 254 | + b64_frames = [] |
| 255 | + for i in range(num_test_frames): |
| 256 | + img = Image.new("RGB", (frame_width, frame_height), color=(i * 80, 0, 0)) |
| 257 | + buf = io.BytesIO() |
| 258 | + img.save(buf, format="JPEG") |
| 259 | + b64_frames.append(pybase64.b64encode(buf.getvalue()).decode("ascii")) |
| 260 | + |
| 261 | + data = ",".join(b64_frames) |
| 262 | + |
| 263 | + imageio = ImageMediaIO() |
| 264 | + videoio = VideoMediaIO(imageio, num_frames=num_test_frames) |
| 265 | + frames, metadata = videoio.load_base64("video/jpeg", data) |
| 266 | + |
| 267 | + # Frames array shape: (num_frames, H, W, 3) |
| 268 | + assert frames.shape[0] == num_test_frames |
| 269 | + |
| 270 | + # All required metadata keys must be present |
| 271 | + required_keys = { |
| 272 | + "total_num_frames", |
| 273 | + "fps", |
| 274 | + "duration", |
| 275 | + "video_backend", |
| 276 | + "frames_indices", |
| 277 | + "do_sample_frames", |
| 278 | + } |
| 279 | + assert required_keys.issubset(metadata.keys()), ( |
| 280 | + f"Missing metadata keys: {required_keys - metadata.keys()}" |
| 281 | + ) |
| 282 | + |
| 283 | + assert metadata["total_num_frames"] == num_test_frames |
| 284 | + assert metadata["video_backend"] == "jpeg_sequence" |
| 285 | + assert metadata["frames_indices"] == list(range(num_test_frames)) |
| 286 | + assert metadata["do_sample_frames"] is False |
| 287 | + # Default fps=1 → duration == num_frames |
| 288 | + assert metadata["fps"] == 1.0 |
| 289 | + assert metadata["duration"] == float(num_test_frames) |
0 commit comments