Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from huggingface_hub import snapshot_download

repo_id = "baidu/ERNIE-4.5-0.3B-Paddle"
local_dir = "/root/wenlei07/model/ERNIE-4.5-0.3B-Paddle"

# 下载整个仓库的内容到指定目录
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

英文注释

local_path = snapshot_download(
repo_id=repo_id,
local_dir=local_dir,
# 启用断点续传和更好的下载管理
local_dir_use_symlinks=False,
)

print(f"模型文件已下载到: {local_path}")
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个文件的作用是?看还在主目录下,是多提交的吗?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

不好意思,我下载模型的脚本忘记删除了,我再提交一下

2 changes: 1 addition & 1 deletion fastdeploy/benchmarks/latency.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def main(args: argparse.Namespace):
# NOTE(woosuk): If the request cannot be processed in a single batch,
# the engine will automatically process the request in multiple batches.
llm = LLM(**dataclasses.asdict(engine_args))
assert llm.llm_engine.cfg.max_model_len >= (args.input_len + args.output_len), (
assert llm.llm_engine.cfg.model_config.max_model_len >= (args.input_len + args.output_len), (
"Please ensure that max_model_len is greater than" " the sum of input_len and output_len."
)

Expand Down
2 changes: 2 additions & 0 deletions fastdeploy/entrypoints/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

def main():
import fastdeploy.entrypoints.cli.benchmark.main
import fastdeploy.entrypoints.cli.collect_env
import fastdeploy.entrypoints.cli.openai
import fastdeploy.entrypoints.cli.run_batch
import fastdeploy.entrypoints.cli.serve
Expand All @@ -34,6 +35,7 @@ def main():
fastdeploy.entrypoints.cli.openai,
fastdeploy.entrypoints.cli.benchmark.main,
fastdeploy.entrypoints.cli.serve,
fastdeploy.entrypoints.cli.collect_env,
]

parser = FlexibleArgumentParser(description="FastDeploy CLI")
Expand Down
4 changes: 2 additions & 2 deletions tests/benchmarks/test_latency_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_main(self, mock_tqdm, mock_randint, mock_llm):
mock_llm_instance = MagicMock()
mock_llm.return_value = mock_llm_instance
mock_cfg = MagicMock()
mock_cfg.max_model_len = 2048
mock_cfg.model_config.max_model_len = 2048
mock_llm_instance.llm_engine.cfg = mock_cfg

mock_randint.return_value = np.zeros((8, 32))
Expand Down Expand Up @@ -74,7 +74,7 @@ def test_main_profile_error(self, mock_exit, mock_llm):
mock_llm_instance = MagicMock()
mock_llm.return_value = mock_llm_instance
mock_cfg = MagicMock()
mock_cfg.max_model_len = 2048
mock_cfg.model_config.max_model_len = 2048
mock_llm_instance.llm_engine.cfg = mock_cfg

# Build args using parser
Expand Down
Loading