Skip to content

Commit 7681375

Browse files
authored
[BugFix] PaddleOCR-VL fix FD_DEBUG type and support v1 loader (#4605)
* [Bug Fix] PaddleOCRVL fix FD_DEBUG type and support HF model * fix bug * fix bug * fix bug
1 parent 6dcf5a3 commit 7681375

File tree

14 files changed

+126
-62
lines changed

14 files changed

+126
-62
lines changed

docs/usage/environment_variables.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
1616

1717
# Enable debug mode (0 or 1)
1818
"FD_DEBUG":
19-
lambda: os.getenv("FD_DEBUG", "0"),
19+
lambda: int(os.getenv("FD_DEBUG", "0")),
2020

2121
# FastDeploy log retention days
2222
"FD_LOG_BACKUP_COUNT":

docs/zh/usage/environment_variables.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
1616

1717
# 是否启用调试模式,可设置为 0 或 1
1818
"FD_DEBUG":
19-
lambda: os.getenv("FD_DEBUG", "0"),
19+
lambda: int(os.getenv("FD_DEBUG", "0")),
2020

2121
# FastDeploy 日志保留天数
2222
"FD_LOG_BACKUP_COUNT":

fastdeploy/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from fastdeploy.entrypoints.llm import LLM
3131
from fastdeploy.utils import current_package_version, envs
3232

33-
if envs.FD_DEBUG != "1":
33+
if envs.FD_DEBUG != 1:
3434
import logging
3535

3636
pf_logger.logger.setLevel(logging.INFO)

fastdeploy/engine/args_utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import argparse
1818
import json
19+
import os
1920
from dataclasses import asdict, dataclass
2021
from dataclasses import fields as dataclass_fields
2122
from typing import Any, Dict, List, Optional, Union
@@ -54,6 +55,16 @@ def nullable_str(x: str) -> Optional[str]:
5455
return x if x else None
5556

5657

58+
def get_model_architecture(model: str, model_config_name: Optional[str] = "config.json") -> Optional[str]:
59+
config_path = os.path.join(model, model_config_name)
60+
if os.path.exists(config_path):
61+
model_config = json.load(open(config_path, "r", encoding="utf-8"))
62+
architecture = model_config["architectures"][0]
63+
return architecture
64+
else:
65+
return model
66+
67+
5768
@dataclass
5869
class EngineArgs:
5970
# Model configuration parameters
@@ -440,6 +451,9 @@ def __post_init__(self):
440451
if self.guided_decoding_backend != "off":
441452
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
442453

454+
if "PaddleOCR" in get_model_architecture(self.model, self.model_config_name):
455+
envs.FD_ENABLE_MAX_PREFILL = 1
456+
443457
@staticmethod
444458
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
445459
"""

fastdeploy/engine/sched/resource_manager_v1.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,6 @@ def _get_num_new_tokens(self, request, token_budget):
340340
if not self.config.model_config.enable_mm:
341341
return num_new_tokens
342342

343-
request.with_image = False
344343
inputs = request.multimodal_inputs
345344
if inputs.get("patch_idx", None) is not None and inputs.get("patch_map", None) is not None:
346345
pre_end_idx = request.num_computed_tokens

fastdeploy/envs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
# Log directory.
2828
"FD_LOG_DIR": lambda: os.getenv("FD_LOG_DIR", "log"),
2929
# Whether to use debug mode, can set 0 or 1
30-
"FD_DEBUG": lambda: os.getenv("FD_DEBUG", "0"),
30+
"FD_DEBUG": lambda: int(os.getenv("FD_DEBUG", "0")),
3131
# Number of days to keep fastdeploy logs.
3232
"FD_LOG_BACKUP_COUNT": lambda: os.getenv("FD_LOG_BACKUP_COUNT", "7"),
3333
# Model download source, can set "AISTUDIO", "MODELSCOPE" or "HUGGINGFACE".

fastdeploy/input/paddleocr_vl_processor/process.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,14 +80,15 @@ def __init__(
8080
self.temporal_conv_size = self.image_processor.temporal_patch_size
8181

8282
# Special tokens and IDs
83-
self.image_token = "<|image_pad|>"
83+
84+
self.image_token = "<|IMAGE_PLACEHOLDER|>"
8485
self.video_token = "<|video_pad|>"
8586

8687
self.image_token_id = self.tokenizer.convert_tokens_to_ids(self.image_token)
8788
self.video_token_id = self.tokenizer.convert_tokens_to_ids(self.video_token)
8889
self.image_patch_id = self.image_token_id
8990

90-
self.vision_start = "<|vision_start|>"
91+
self.vision_start = "<|IMAGE_START|>"
9192
self.vision_start_id = self.tokenizer.convert_tokens_to_ids(self.vision_start)
9293

9394
self.tokens_per_second = tokens_per_second
@@ -167,9 +168,8 @@ def text2ids(self, text, images=None, videos=None):
167168
"vit_position_ids": [],
168169
}
169170
# Define placeholders and their lengths
170-
# IMAGE_PLACEHOLDER = "<|vision_start|><|image_pad|><|vision_end|>"
171-
IMAGE_PLACEHOLDER = "<|image_pad|>"
172-
VIDEO_PLACEHOLDER = "<|video@placeholder|>"
171+
IMAGE_PLACEHOLDER = self.image_token
172+
VIDEO_PLACEHOLDER = self.video_token
173173
IMAGE_PLACEHOLDER_LEN = len(IMAGE_PLACEHOLDER)
174174
VIDEO_PLACEHOLDER_LEN = len(VIDEO_PLACEHOLDER)
175175

fastdeploy/logger/logger.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def _get_legacy_logger(self, name, file_name, without_formater=False, print_to_c
104104
if not os.path.exists(log_dir):
105105
os.makedirs(log_dir, exist_ok=True)
106106

107-
is_debug = int(envs.FD_DEBUG)
107+
is_debug = envs.FD_DEBUG
108108
# logger = logging.getLogger(name)
109109
# 为了兼容原有接口,使用命名空间进行隔离,避免logger覆盖、混乱等问题
110110
legacy_name = f"legacy.{name}"

fastdeploy/model_executor/models/paddleocr_vl/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from paddleformers.transformers.configuration_utils import PretrainedConfig
1818

1919

20-
class PPOCRVisionConfig(PretrainedConfig):
20+
class PaddleOCRVisionConfig(PretrainedConfig):
2121
model_type = "paddleocr_vl"
2222
base_config_key = "vision_config"
2323

@@ -58,7 +58,7 @@ def __init__(
5858
class PaddleOCRConfig(PretrainedConfig):
5959
model_type = "paddleocr_vl"
6060
keys_to_ignore_at_inference = ["past_key_values"]
61-
sub_configs = {"vision_config": PPOCRVisionConfig}
61+
sub_configs = {"vision_config": PaddleOCRVisionConfig}
6262

6363
base_model_tp_plan = {
6464
"layers.*.self_attn.q_proj": "colwise",

fastdeploy/model_executor/models/paddleocr_vl/projector.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"""
1616

1717
import math
18+
from typing import Optional
1819

1920
import paddle
2021
import paddle.nn as nn
@@ -57,8 +58,10 @@ def __init__(self, text_config, vision_config, prefix=""):
5758

5859
self.pre_norm = nn.LayerNorm(self.vision_config.hidden_size, epsilon=1e-05)
5960
self.linear_1 = nn.Linear(self.hidden_size, self.hidden_size)
61+
self.linear_1.weight.weight_loader = self.weight_loader
6062
self.act = GELUActivation()
6163
self.linear_2 = nn.Linear(self.hidden_size, self.text_config.hidden_size)
64+
self.linear_2.weight.weight_loader = self.weight_loader
6265

6366
def forward(self, image_features, image_grid_thw):
6467
m1, m2 = self.merge_kernel_size
@@ -94,6 +97,20 @@ def forward(self, image_features, image_grid_thw):
9497
hidden_states = self.linear_2(hidden_states)
9598
return hidden_states
9699

100+
def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = None):
101+
loaded_weight = get_tensor(loaded_weight)
102+
loaded_weight = loaded_weight.transpose([1, 0])
103+
assert param.shape == loaded_weight.shape, (
104+
f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
105+
)
106+
# Ensure loaded weight dtype matches model param dtype
107+
if loaded_weight.dtype != param.dtype:
108+
if loaded_weight.dtype == paddle.int8 and param.dtype == paddle.float8_e4m3fn:
109+
loaded_weight = loaded_weight.view(param.dtype)
110+
else:
111+
loaded_weight = loaded_weight.cast(param.dtype)
112+
param.copy_(loaded_weight, False)
113+
97114
def load_state_dict(self, state_dict):
98115
params_dict = dict(self.named_parameters())
99116
for param_name, param in params_dict.items():

0 commit comments

Comments
 (0)