Skip to content

Commit 3495e80

Browse files
committed
Refactor image parsing in Dockerfile.meipian and chat_utils.py
1 parent 34b0469 commit 3495e80

File tree

2 files changed

+12
-11
lines changed

2 files changed

+12
-11
lines changed

Dockerfile.meipian

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,9 @@ WORKDIR /workspace
44

55
RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple && \
66
pip install redis && \
7+
pip install flash-attn --no-build-isolation && \
78
pip install https://vllm-wheels.s3.us-west-2.amazonaws.com/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
89

9-
10-
# Attention: image &&& wheel && branch are the same version
11-
# sync main
12-
# git clone https://github.com/whyiug/vllm
13-
# cd vllm
14-
# git checkout feature_redis_image_embeds
15-
# git merge origin main
16-
1710
COPY . /workspace/vllm
1811

1912
WORKDIR /workspace/vllm

vllm/entrypoints/chat_utils.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ def load_chat_template(
358358
# TODO: Let user specify how to insert multimodal tokens into prompt
359359
# (similar to chat template)
360360
def _get_full_multimodal_text_prompt(placeholder_counts: Dict[str, int],
361-
text_prompt: str) -> str:
361+
text_prompt: str, model_type: str) -> str:
362362
"""Combine multimodal prompts for a multimodal language model."""
363363

364364
# Look through the text prompt to check for missing placeholders
@@ -378,7 +378,13 @@ def _get_full_multimodal_text_prompt(placeholder_counts: Dict[str, int],
378378

379379
# NOTE: For now we always add missing placeholders at the front of
380380
# the prompt. This may change to be customizable in the future.
381-
return "\n".join(missing_placeholders + [text_prompt])
381+
if model_type == "qwen2_vl":
382+
# TODO: multi images not very well supported
383+
multimodal_prompt = "".join(missing_placeholders + [text_prompt])
384+
else:
385+
multimodal_prompt = "\n".join(missing_placeholders + [text_prompt])
386+
387+
return multimodal_prompt
382388

383389

384390
# No need to validate using Pydantic again
@@ -442,7 +448,9 @@ def _parse_chat_message_content_parts(
442448
mm_placeholder_counts = mm_parser.mm_placeholder_counts()
443449
if mm_placeholder_counts:
444450
text_prompt = _get_full_multimodal_text_prompt(
445-
mm_placeholder_counts, text_prompt)
451+
mm_placeholder_counts,
452+
text_prompt,
453+
mm_tracker._model_config.hf_config.model_type)
446454
return [ConversationMessage(role=role, content=text_prompt)]
447455

448456

0 commit comments

Comments
 (0)