Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion tests/models/test_phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,10 @@ def run_test(

inputs_per_image = [(
[prompt for _ in size_factors],
[rescale_image_size(image, factor) for factor in size_factors],
[
rescale_image_size(image, factor, transpose=idx)
for idx, factor in enumerate(size_factors)
],
) for image, prompt in zip(images, HF_IMAGE_PROMPTS)]

# NOTE: take care of the order. run vLLM first, and then run HF.
Expand Down
26 changes: 12 additions & 14 deletions vllm/model_executor/models/phi3v.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def hd_feature_transform(self, image_features, image_sizes):
global_image_features_hd_newline = self.add_image_newline(
global_image_features_hd)

all_image_embeddings = []
batch_image_features_proj = []
# need a for loop to process each image because of different image sizes
# (patch arrangement is different for each image)
for i, img_size in enumerate(image_sizes):
Expand All @@ -207,19 +207,17 @@ def hd_feature_transform(self, image_features, image_sizes):
sub_image_features_hd)

# [sub features, separator, global features]
all_image_embeddings.append(
torch.cat([
sub_image_features_hd_newline.squeeze(
0), # (h_crop*12*(w_crop*12+1), 4096)
self.glb_GN.squeeze(0),
global_image_features_hd_newline[i],
]))

image_features_proj = self.img_projection(
torch.stack(all_image_embeddings).to(target_device, target_dtype)
) # (num_images, (h_crop*12*(w_crop*12+1)+1), hidden_size)

return image_features_proj
image_embeddings = torch.cat([
sub_image_features_hd_newline.squeeze(
0), # (h_crop*12*(w_crop*12+1), 4096)
self.glb_GN.squeeze(0),
global_image_features_hd_newline[i],
])
img_proj = self.img_projection(
image_embeddings.to(target_device, target_dtype))
batch_image_features_proj.append(img_proj)

return batch_image_features_proj

def reshape_hd_patches_2x2merge(self, image_features, h_crop, w_crop):
"""
Expand Down
9 changes: 7 additions & 2 deletions vllm/multimodal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,13 @@ def load_image_from_base64(image: Union[bytes, str]) -> Image.Image:
return _load_image_from_bytes(base64.b64decode(image))


def rescale_image_size(image: Image.Image, size_factor: float) -> Image.Image:
def rescale_image_size(image: Image.Image,
size_factor: float,
transpose: int = -1) -> Image.Image:
"""Rescale the dimensions of an image by a constant factor."""
new_width = int(image.width * size_factor)
new_height = int(image.height * size_factor)
return image.resize((new_width, new_height))
image = image.resize((new_width, new_height))
if transpose >= 0:
image = image.transpose(Image.Transpose(transpose))
return image