Skip to content

Commit 7e6229c

Browse files
committed
[Feature] add cli command serve
1 parent c96a535 commit 7e6229c

File tree

10 files changed

+174
-36
lines changed

10 files changed

+174
-36
lines changed

fastdeploy/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
pass
4444
# TODO(tangbinhan): remove this code
4545

46+
__version__ = "2.3.0-dev"
47+
4648

4749
def _patch_fastsafetensors():
4850
try:

fastdeploy/entrypoints/cli/main.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,27 @@
1717
# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/main.py
1818
from __future__ import annotations
1919

20-
import importlib.metadata
20+
from fastdeploy import __version__
2121

2222

2323
def main():
2424
import fastdeploy.entrypoints.cli.benchmark.main
2525
import fastdeploy.entrypoints.cli.openai
26+
import fastdeploy.entrypoints.cli.serve
2627
from fastdeploy.utils import FlexibleArgumentParser
2728

2829
CMD_MODULES = [
2930
fastdeploy.entrypoints.cli.openai,
3031
fastdeploy.entrypoints.cli.benchmark.main,
32+
fastdeploy.entrypoints.cli.serve,
3133
]
3234

3335
parser = FlexibleArgumentParser(description="FastDeploy CLI")
3436
parser.add_argument(
3537
"-v",
3638
"--version",
3739
action="version",
38-
version=importlib.metadata.version("fastdeploy-gpu"),
40+
version=__version__,
3941
)
4042
subparsers = parser.add_subparsers(required=False, dest="subparser")
4143
cmds = {}

fastdeploy/entrypoints/cli/openai.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def _add_query_options(parser: FlexibleArgumentParser) -> FlexibleArgumentParser
8686
parser.add_argument(
8787
"--url",
8888
type=str,
89-
default="http://localhost:9904/v1",
89+
default="http://localhost:8000/v1",
9090
help="url of the running OpenAI-Compatible RESTful API server",
9191
)
9292
parser.add_argument(
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
"""
2+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License"
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""
16+
17+
# This file is modified from https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/cli/serve.py
18+
19+
import argparse
20+
import atexit
21+
import os
22+
import signal
23+
import subprocess
24+
import sys
25+
26+
from fastdeploy.entrypoints.cli.types import CLISubcommand
27+
from fastdeploy.entrypoints.openai.utils import make_arg_parser
28+
from fastdeploy.utils import FlexibleArgumentParser
29+
30+
31+
class ServeSubcommand(CLISubcommand):
32+
"""The `serve` subcommand for the fastdeploy CLI."""
33+
34+
name = "serve"
35+
36+
@staticmethod
37+
def cmd(args: argparse.Namespace) -> None:
38+
env = os.environ.copy()
39+
cmd = [
40+
sys.executable,
41+
"-m",
42+
"fastdeploy.entrypoints.openai.api_server",
43+
*sys.argv[2:],
44+
]
45+
46+
# 启动子进程
47+
proc = subprocess.Popen(cmd, env=env)
48+
print(f"Starting server (PID: {proc.pid})")
49+
50+
# 定义清理函数
51+
def cleanup():
52+
"""终止子进程并确保资源释放"""
53+
if proc.poll() is None: # 检查子进程是否仍在运行
54+
print(f"\nTerminating child process (PID: {proc.pid})...")
55+
proc.terminate() # 发送终止信号
56+
57+
# 注册退出时的清理函数
58+
atexit.register(cleanup)
59+
# 设置信号处理
60+
61+
def signal_handler(signum, frame):
62+
cleanup()
63+
sys.exit(0)
64+
65+
# 捕获 SIGINT (Ctrl+C) 和 SIGTERM
66+
signal.signal(signal.SIGINT, signal_handler)
67+
signal.signal(signal.SIGTERM, signal_handler)
68+
# 主进程阻塞等待子进程
69+
proc.wait()
70+
71+
def subparser_init(self, subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser:
72+
serve_parser = subparsers.add_parser(
73+
name=self.name,
74+
help="Start the FastDeploy OpenAI Compatible API server.",
75+
description="Start the FastDeploy OpenAI Compatible API server.",
76+
usage="fastdeploy serve [model_tag] [options]",
77+
)
78+
serve_parser = make_arg_parser(serve_parser)
79+
serve_parser.add_argument("--config", help="Read CLI options from a config file. Must be a YAML file")
80+
return serve_parser
81+
82+
83+
def cmd_init() -> list[CLISubcommand]:
84+
return [ServeSubcommand()]

fastdeploy/entrypoints/openai/api_server.py

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from fastdeploy.entrypoints.openai.serving_completion import OpenAIServingCompletion
5050
from fastdeploy.entrypoints.openai.serving_models import ModelPath, OpenAIServingModels
5151
from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
52-
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG
52+
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG, make_arg_parser
5353
from fastdeploy.metrics.metrics import (
5454
EXCLUDE_LABELS,
5555
cleanup_prometheus_files,
@@ -67,31 +67,7 @@
6767
retrive_model_from_server,
6868
)
6969

70-
parser = FlexibleArgumentParser()
71-
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
72-
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
73-
parser.add_argument("--workers", default=1, type=int, help="number of workers")
74-
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
75-
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
76-
parser.add_argument(
77-
"--max-waiting-time",
78-
default=-1,
79-
type=int,
80-
help="max waiting time for connection, if set value -1 means no waiting time limit",
81-
)
82-
parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency")
83-
84-
parser.add_argument(
85-
"--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. "
86-
)
87-
parser.add_argument(
88-
"--timeout-graceful-shutdown",
89-
default=0,
90-
type=int,
91-
help="timeout for graceful shutdown in seconds (used by uvicorn)",
92-
)
93-
94-
parser = EngineArgs.add_cli_args(parser)
70+
parser = make_arg_parser(FlexibleArgumentParser())
9571
args = parser.parse_args()
9672

9773
console_logger.info(f"Number of api-server workers: {args.workers}.")

fastdeploy/entrypoints/openai/utils.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
import msgpack
2323
import zmq
2424

25-
from fastdeploy.utils import api_server_logger
25+
from fastdeploy.engine.args_utils import EngineArgs
26+
from fastdeploy.utils import FlexibleArgumentParser, api_server_logger
2627

2728
UVICORN_CONFIG = {
2829
"version": 1,
@@ -201,3 +202,31 @@ async def close(self):
201202
self.request_map.clear()
202203

203204
api_server_logger.info("All connections and tasks closed")
205+
206+
207+
def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
208+
parser.add_argument("--port", default=8000, type=int, help="port to the http server")
209+
parser.add_argument("--host", default="0.0.0.0", type=str, help="host to the http server")
210+
parser.add_argument("--workers", default=1, type=int, help="number of workers")
211+
parser.add_argument("--metrics-port", default=8001, type=int, help="port for metrics server")
212+
parser.add_argument("--controller-port", default=-1, type=int, help="port for controller server")
213+
parser.add_argument(
214+
"--max-waiting-time",
215+
default=-1,
216+
type=int,
217+
help="max waiting time for connection, if set value -1 means no waiting time limit",
218+
)
219+
parser.add_argument("--max-concurrency", default=512, type=int, help="max concurrency")
220+
221+
parser.add_argument(
222+
"--enable-mm-output", action="store_true", help="Enable 'multimodal_content' field in response output. "
223+
)
224+
parser.add_argument(
225+
"--timeout-graceful-shutdown",
226+
default=0,
227+
type=int,
228+
help="timeout for graceful shutdown in seconds (used by uvicorn)",
229+
)
230+
231+
parser = EngineArgs.add_cli_args(parser)
232+
return parser

fastdeploy/plugins/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def load_plugins_by_group(group: str) -> dict[str, Callable[[], Any]]:
3232

3333
discovered_plugins = entry_points(group=group)
3434
if len(discovered_plugins) == 0:
35-
logger.info("No plugins for group %s found.", group)
35+
logger.debug("No plugins for group %s found.", group)
3636
return {}
3737

3838
logger.info("Available plugins for group %s:", group)

setup.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
from setuptools.command.install import install
2727
from wheel.bdist_wheel import bdist_wheel
2828

29+
from fastdeploy import __version__
30+
2931
long_description = "FastDeploy: Large Language Model Serving.\n\n"
3032
long_description += "GitHub: https://github.com/PaddlePaddle/FastDeploy\n"
3133
long_description += "Email: [email protected]"
@@ -185,7 +187,7 @@ def get_name():
185187

186188
cmdclass_dict = {"bdist_wheel": CustomBdistWheel}
187189
cmdclass_dict["build_ext"] = CMakeBuild
188-
FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", "2.3.0-dev")
190+
FASTDEPLOY_VERSION = os.environ.get("FASTDEPLOY_VERSION", __version__)
189191
cmdclass_dict["build_optl"] = PostInstallCommand
190192

191193
setup(

tests/entrypoints/cli/test_main.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,8 @@
66

77
class TestCliMain(unittest.TestCase):
88
@patch("fastdeploy.utils.FlexibleArgumentParser")
9-
@patch("fastdeploy.entrypoints.cli.main.importlib.metadata")
10-
def test_main_basic(self, mock_metadata, mock_parser):
9+
def test_main_basic(self, mock_parser):
1110
# Setup mocks
12-
mock_metadata.version.return_value = "1.0.0"
1311
mock_args = MagicMock()
1412
mock_args.subparser = None
1513
mock_parser.return_value.parse_args.return_value = mock_args
@@ -18,7 +16,6 @@ def test_main_basic(self, mock_metadata, mock_parser):
1816
cli_main()
1917

2018
# Verify version check
21-
mock_metadata.version.assert_called_once_with("fastdeploy-gpu")
2219
mock_args.dispatch_function.assert_called_once()
2320

2421

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import argparse
2+
import unittest
3+
from unittest.mock import MagicMock, patch
4+
5+
from fastdeploy.entrypoints.cli.serve import ServeSubcommand, cmd_init
6+
7+
8+
class TestServeSubcommand(unittest.TestCase):
9+
"""Tests for ServeSubcommand class."""
10+
11+
def test_name_property(self):
12+
"""Test the name property is correctly set."""
13+
self.assertEqual(ServeSubcommand.name, "serve")
14+
15+
@patch("subprocess.Popen", return_value=MagicMock())
16+
def test_cmd_method(self, mock_subprocess):
17+
"""Test the cmd method calls the expected API server functions."""
18+
test_args = argparse.Namespace(port=8000)
19+
mock_subprocess.return_value.pid = 1
20+
ServeSubcommand.cmd(test_args)
21+
mock_subprocess.assert_called_once()
22+
23+
def test_validate_method(self):
24+
"""Test the validate method does nothing (no-op)."""
25+
test_args = argparse.Namespace()
26+
instance = ServeSubcommand()
27+
instance.validate(test_args) # Should not raise any exceptions
28+
29+
@patch("argparse._SubParsersAction.add_parser")
30+
def test_subparser_init(self, mock_add_parser):
31+
"""Test the subparser initialization."""
32+
mock_subparsers = MagicMock()
33+
instance = ServeSubcommand()
34+
result = instance.subparser_init(mock_subparsers)
35+
self.assertIsNotNone(result)
36+
37+
def test_cmd_init_returns_list(self):
38+
"""Test cmd_init returns a list of subcommands."""
39+
result = cmd_init()
40+
self.assertIsInstance(result, list)
41+
self.assertEqual(len(result), 1)
42+
self.assertIsInstance(result[0], ServeSubcommand)
43+
44+
45+
if __name__ == "__main__":
46+
unittest.main()

0 commit comments

Comments
 (0)