Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

from typing import TYPE_CHECKING, List, Optional, Union

import numpy as np

from ...feature_extraction_utils import BatchFeature
from ...image_processing_utils import select_best_resolution
from ...image_utils import ImageInput, VideoInput, get_image_size, to_numpy_array
Expand Down Expand Up @@ -193,7 +195,11 @@ def __call__(

# videos are easier, simply get frames and multiply
if videos_inputs:
one_video = to_numpy_array(videos_inputs.get("pixel_values_videos")[0])
one_video = videos_inputs.get("pixel_values_videos")[0]
if isinstance(one_video, (list, tuple)):
one_video = np.array(one_video)
else:
one_video = to_numpy_array(one_video)
height, width = get_image_size(one_video[0])
num_frames = one_video.shape[0] # frame dim is always after batch dim

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import os
from typing import Iterable, List, Union

import numpy as np

from ...feature_extraction_utils import BatchFeature
from ...image_processing_utils import select_best_resolution
from ...image_utils import ImageInput, VideoInput, get_image_size, to_numpy_array
Expand Down Expand Up @@ -164,7 +166,11 @@ def __call__(
if videos is not None:
video_inputs = self.video_processor(videos, **output_kwargs["videos_kwargs"])

one_video = to_numpy_array(video_inputs.get("pixel_values_videos")[0])
one_video = video_inputs.get("pixel_values_videos")[0]
if isinstance(video_inputs.get("pixel_values_videos")[0], (list, tuple)):
one_video = np.array(one_video)
else:
one_video = to_numpy_array(one_video)
height, width = get_image_size(one_video[0], channel_dim=output_kwargs["images_kwargs"].get("data_format"))
num_frames = one_video.shape[0] # frame dim is always after batch dim
patches_height_width = int(math.sqrt(self.num_image_tokens))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

from typing import List, Optional, Union

import numpy as np

from ...feature_extraction_utils import BatchFeature
from ...image_utils import ImageInput, get_image_size, to_numpy_array
from ...processing_utils import ProcessorMixin
Expand Down Expand Up @@ -165,7 +167,11 @@ def __call__(
num_frames = 1

if "pixel_values_videos" in encoded_images.keys():
one_video = to_numpy_array(encoded_images.get("pixel_values_videos")[0])
one_video = encoded_images.get("pixel_values_videos")[0]
if isinstance(encoded_images.get("pixel_values_videos")[0], (list, tuple)):
one_video = np.array(one_video)
else:
one_video = to_numpy_array(one_video)
height, width = get_image_size(one_video[0])
num_frames = one_video.shape[0] # frame dim is always after batch dim

Expand Down