Skip to content

Commit 8b0ce8e

Browse files
authored
[Feature] add cli command serve (#4226)
1 parent 9566ae8 commit 8b0ce8e

File tree

12 files changed

+232
-36
lines changed

12 files changed

+232
-36
lines changed

fastdeploy/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
from fastdeploy.engine.sampling_params import SamplingParams
3030
from fastdeploy.entrypoints.llm import LLM
31-
from fastdeploy.utils import envs
31+
from fastdeploy.utils import current_package_version, envs
3232

3333
if envs.FD_DEBUG != "1":
3434
import logging
@@ -43,6 +43,8 @@
4343
pass
4444
# TODO(tangbinhan): remove this code
4545

46+
__version__ = current_package_version()
47+
4648

4749
def _patch_fastsafetensors():
4850
try:

fastdeploy/entrypoints/cli/main.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,27 @@
1717
# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/main.py
1818
from __future__ import annotations
1919

20-
import importlib.metadata
20+
from fastdeploy import __version__
2121

2222

2323
def main():
2424
import fastdeploy.entrypoints.cli.benchmark.main
2525
import fastdeploy.entrypoints.cli.openai
26+
import fastdeploy.entrypoints.cli.serve
2627
from fastdeploy.utils import FlexibleArgumentParser
2728

2829
CMD_MODULES = [
2930
fastdeploy.entrypoints.cli.openai,
3031
fastdeploy.entrypoints.cli.benchmark.main,
32+
fastdeploy.entrypoints.cli.serve,
3133
]
3234

3335
parser = FlexibleArgumentParser(description="FastDeploy CLI")
3436
parser.add_argument(
3537
"-v",
3638
"--version",
3739
action="version",
38-
version=importlib.metadata.version("fastdeploy-gpu"),
40+
version=__version__,
3941
)
4042
subparsers = parser.add_subparsers(required=False, dest="subparser")
4143
cmds = {}

fastdeploy/entrypoints/cli/openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def _add_query_options(parser: FlexibleArgumentParser) -> FlexibleArgumentParser
8686
parser.add_argument(
8787
"--url",
8888
type=str,
89-
default="http://localhost:9904/v1",
89+
default="http://localhost:8000/v1",
9090
help="url of the running OpenAI-Compatible RESTful API server",
9191
)
9292
parser.add_argument(
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"""
2+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License"
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""
16+
17+
# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/serve.py
18+
19+
import argparse
20+
import atexit
21+
import os
22+
import signal
23+
import subprocess
24+
import sys
25+
26+
from fastdeploy.entrypoints.cli.types import CLISubcommand
27+
from fastdeploy.entrypoints.openai.utils import make_arg_parser
28+
from fastdeploy.utils import FlexibleArgumentParser
29+
30+
31+
class ServeSubcommand(CLISubcommand):
32+
"""The `serve` subcommand for the fastdeploy CLI."""
33+
34+
name = "serve"
35+
36+
@staticmethod
37+
def cmd(args: argparse.Namespace) -> None:
38+
env = os.environ.copy()
39+
cmd = [
40+
sys.executable,
41+
"-m",
42+
"fastdeploy.entrypoints.openai.api_server",
43+
*sys.argv[2:],
44+
]
45+
46+
# 启动子进程
47+
proc = subprocess.Popen(cmd, env=env)
48+
print(f"Starting server (PID: {proc.pid})")
49+
50+
# 定义清理函数
51+
def cleanup():
52+
"""终止子进程并确保资源释放"""
53+
if proc.poll() is None: # 检查子进程是否仍在运行
54+
print(f"\nTerminating child process (PID: {proc.pid})...")
55+
proc.terminate() # 发送终止信号
56+
57+
# 注册退出时的清理函数
58+
atexit.register(cleanup)
59+
# 设置信号处理
60+
61+
def signal_handler(signum, frame):
62+
cleanup()
63+
sys.exit(0)
64+
65+
# 捕获 SIGINT (Ctrl+C) 和 SIGTERM
66+
signal.signal(signal.SIGINT, signal_handler)
67+
signal.signal(signal.SIGTERM, signal_handler)
68+
# 主进程阻塞等待子进程
69+
proc.wait()
70+
71+
def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
72+
serve_parser = subparsers.add_parser(
73+
name=self.name,
74+
help="Start the FastDeploy OpenAI Compatible API server.",
75+
description="Start the FastDeploy OpenAI Compatible API server.",
76+
usage="fastdeploy serve [model_tag] [options]",
77+
)
78+
serve_parser = make_arg_parser(serve_parser)
79+
serve_parser.add_argument("--config", help="Read CLI options from a config file. Must be a YAML file")
80+
return serve_parser
81+
82+
83+
def cmd_init() -> list[CLISubcommand]:
84+
return [ServeSubcommand()]

fastdeploy/entrypoints/openai/api_server.py

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from fastdeploy.entrypoints.openai.serving_completion import OpenAIServingCompletion
5050
from fastdeploy.entrypoints.openai.serving_models import ModelPath, OpenAIServingModels
5151
from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
52-
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG
52+
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG, make_arg_parser
5353
from fastdeploy.metrics.metrics import (
5454
EXCLUDE_LABELS,
5555
cleanup_prometheus_files,
@@ -67,31 +67,7 @@
6767
retrive_model_from_server,
6868
)
6969

70-
parser = FlexibleArgumentParser()
71-
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
72-
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
73-
parser.add_argument("--workers", default=1, type=int, help="number of workers")
74-
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
75-
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
76-
parser.add_argument(
77-
"--max-waiting-time",
78-
default=-1,
79-
type=int,
80-
help="max waiting time for connection, if set value -1 means no waiting time limit",
81-
)
82-
parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency")
83-
84-
parser.add_argument(
85-
"--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. "
86-
)
87-
parser.add_argument(
88-
"--timeout-graceful-shutdown",
89-
default=0,
90-
type=int,
91-
help="timeout for graceful shutdown in seconds (used by uvicorn)",
92-
)
93-
94-
parser = EngineArgs.add_cli_args(parser)
70+
parser = make_arg_parser(FlexibleArgumentParser())
9571
args = parser.parse_args()
9672

9773
console_logger.info(f"Number of api-server workers: {args.workers}.")

fastdeploy/entrypoints/openai/utils.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
import msgpack
2323
import zmq
2424

25-
from fastdeploy.utils import api_server_logger
25+
from fastdeploy.engine.args_utils import EngineArgs
26+
from fastdeploy.utils import FlexibleArgumentParser, api_server_logger
2627

2728
UVICORN_CONFIG = {
2829
"version": 1,
@@ -201,3 +202,31 @@ async def close(self):
201202
self.request_map.clear()
202203

203204
api_server_logger.info("All connections and tasks closed")
205+
206+
207+
def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
208+
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
209+
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
210+
parser.add_argument("--workers", default=1, type=int, help="number of workers")
211+
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
212+
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
213+
parser.add_argument(
214+
"--max-waiting-time",
215+
default=-1,
216+
type=int,
217+
help="max waiting time for connection, if set value -1 means no waiting time limit",
218+
)
219+
parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency")
220+
221+
parser.add_argument(
222+
"--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. "
223+
)
224+
parser.add_argument(
225+
"--timeout-graceful-shutdown",
226+
default=0,
227+
type=int,
228+
help="timeout for graceful shutdown in seconds (used by uvicorn)",
229+
)
230+
231+
parser = EngineArgs.add_cli_args(parser)
232+
return parser

fastdeploy/plugins/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
3232

3333
discovered_plugins = entry_points(group=group)
3434
if len(discovered_plugins) == 0:
35-
logger.info("No plugins for group %s found.", group)
35+
logger.debug("No plugins for group %s found.", group)
3636
return {}
3737

3838
logger.info("Available plugins for group %s:", group)

fastdeploy/utils.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,36 @@ def version():
757757
return content
758758

759759

760+
def current_package_version():
761+
"""
762+
读取version.txt文件,解析出fastdeploy version对应的版本号
763+
764+
Args:
765+
Returns:
766+
str: fastdeploy版本号,如果解析失败返回Unknown
767+
"""
768+
fd_version = "Unknown"
769+
try:
770+
content = version()
771+
if content == "Unknown":
772+
return fd_version
773+
774+
# 按行分割内容
775+
lines = content.strip().split("\n")
776+
# 查找包含"fastdeploy version:"的行
777+
for line in lines:
778+
if line.startswith("fastdeploy version:"):
779+
# 提取版本号部分
780+
fd_version = line.split("fastdeploy version:")[1].strip()
781+
return fd_version
782+
llm_logger.warning("fastdeploy version not found in version.txt")
783+
# 如果没有找到对应的行,返回None
784+
return fd_version
785+
except Exception as e:
786+
llm_logger.error(f"Failed to parse fastdeploy version from version.txt: {e}")
787+
return fd_version
788+
789+
760790
class DeprecatedOptionWarning(argparse.Action):
761791
def __init__(self, option_strings, dest, **kwargs):
762792
super().__init__(option_strings, dest, nargs=0, **kwargs)

setup.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,16 @@ def get_name():
190190
FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", "2.3.0-dev")
191191
cmdclass_dict["build_optl"] = PostInstallCommand
192192

193+
194+
def write_version_to_file():
195+
current_dir = os.path.dirname(os.path.abspath(__file__))
196+
version_file_path = os.path.join(current_dir, "fastdeploy/version.txt")
197+
with open(version_file_path, "a") as f:
198+
f.write(f"fastdeploy version: {FASTDEPLOY_VERSION}\n")
199+
200+
201+
write_version_to_file()
202+
193203
setup(
194204
name=get_name(),
195205
version=FASTDEPLOY_VERSION,

tests/entrypoints/cli/test_main.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,8 @@
66

77
class TestCliMain(unittest.TestCase):
88
@patch("fastdeploy.utils.FlexibleArgumentParser")
9-
@patch("fastdeploy.entrypoints.cli.main.importlib.metadata")
10-
def test_main_basic(self, mock_metadata, mock_parser):
9+
def test_main_basic(self, mock_parser):
1110
# Setup mocks
12-
mock_metadata.version.return_value = "1.0.0"
1311
mock_args = MagicMock()
1412
mock_args.subparser = None
1513
mock_parser.return_value.parse_args.return_value = mock_args
@@ -18,7 +16,6 @@ def test_main_basic(self, mock_metadata, mock_parser):
1816
cli_main()
1917

2018
# Verify version check
21-
mock_metadata.version.assert_called_once_with("fastdeploy-gpu")
2219
mock_args.dispatch_function.assert_called_once()
2320

2421

0 commit comments

Comments
 (0)