Skip to content

Commit 8aedab4

Browse files
he-yufengIsotr0py
authored andcommitted
[Bugfix] Fix base64 JPEG video frames returning empty metadata (vllm-project#37301)
Signed-off-by: Yufeng He <40085740+universeplayer@users.noreply.github.com> Signed-off-by: Yufeng He <40085740+he-yufeng@users.noreply.github.com> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Yufeng He <40085740+universeplayer@users.noreply.github.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
1 parent 1687b30 commit 8aedab4

2 files changed

Lines changed: 66 additions & 2 deletions

File tree

tests/multimodal/media/test_video.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
import io
34
from pathlib import Path
45

56
import numpy as np
67
import numpy.typing as npt
8+
import pybase64
79
import pytest
810
from PIL import Image
911

@@ -235,3 +237,53 @@ def test_video_media_io_backend_env_var_fallback(monkeypatch: pytest.MonkeyPatch
235237
frames_missing, metadata_missing = videoio_missing.load_bytes(b"test")
236238
np.testing.assert_array_equal(frames_missing, FAKE_OUTPUT_2)
237239
assert metadata_missing["video_backend"] == "test_video_backend_override_2"
240+
241+
242+
def test_load_base64_jpeg_returns_metadata():
243+
"""Regression test: load_base64 with video/jpeg must return metadata.
244+
245+
Previously, base64 JPEG frame sequences returned an empty dict for
246+
metadata, which broke downstream consumers that rely on fields like
247+
total_num_frames and fps. See PR #37301.
248+
"""
249+
250+
num_test_frames = 3
251+
frame_width, frame_height = 8, 8
252+
253+
# Build a few tiny JPEG frames and base64-encode them
254+
b64_frames = []
255+
for i in range(num_test_frames):
256+
img = Image.new("RGB", (frame_width, frame_height), color=(i * 80, 0, 0))
257+
buf = io.BytesIO()
258+
img.save(buf, format="JPEG")
259+
b64_frames.append(pybase64.b64encode(buf.getvalue()).decode("ascii"))
260+
261+
data = ",".join(b64_frames)
262+
263+
imageio = ImageMediaIO()
264+
videoio = VideoMediaIO(imageio, num_frames=num_test_frames)
265+
frames, metadata = videoio.load_base64("video/jpeg", data)
266+
267+
# Frames array shape: (num_frames, H, W, 3)
268+
assert frames.shape[0] == num_test_frames
269+
270+
# All required metadata keys must be present
271+
required_keys = {
272+
"total_num_frames",
273+
"fps",
274+
"duration",
275+
"video_backend",
276+
"frames_indices",
277+
"do_sample_frames",
278+
}
279+
assert required_keys.issubset(metadata.keys()), (
280+
f"Missing metadata keys: {required_keys - metadata.keys()}"
281+
)
282+
283+
assert metadata["total_num_frames"] == num_test_frames
284+
assert metadata["video_backend"] == "jpeg_sequence"
285+
assert metadata["frames_indices"] == list(range(num_test_frames))
286+
assert metadata["do_sample_frames"] is False
287+
# Default fps=1 → duration == num_frames
288+
assert metadata["fps"] == 1.0
289+
assert metadata["duration"] == float(num_test_frames)

vllm/multimodal/media/video.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,21 @@ def load_base64(
8080
"image/jpeg",
8181
)
8282

83-
return np.stack(
83+
frames = np.stack(
8484
[np.asarray(load_frame(frame_data)) for frame_data in data.split(",")]
85-
), {}
85+
)
86+
total = int(frames.shape[0])
87+
fps = float(self.kwargs.get("fps", 1))
88+
duration = total / fps if fps > 0 else 0.0
89+
metadata = {
90+
"total_num_frames": total,
91+
"fps": fps,
92+
"duration": duration,
93+
"video_backend": "jpeg_sequence",
94+
"frames_indices": list(range(total)),
95+
"do_sample_frames": False,
96+
}
97+
return frames, metadata
8698

8799
return self.load_bytes(pybase64.b64decode(data))
88100

0 commit comments

Comments
 (0)