Skip to content

Commit b40ec2d

Browse files
sawsa307Fridge003
authored andcommitted
Migrate all callers from /get_server_info to /server_info (#21463)
1 parent 4dfdab6 commit b40ec2d

48 files changed

Lines changed: 74 additions & 70 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

docs/advanced_features/server_arguments.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ Please consult the documentation below and [server_args.py](https://github.com/s
212212
| Argument | Description | Defaults | Options |
213213
| --- | --- | --- | --- |
214214
| `--api-key` | Set API key of the server. It is also used in the OpenAI API compatible server. | `None` | Type: str |
215-
| `--admin-api-key` | Set **admin API key** for administrative/control endpoints (e.g., weights update, cache flush, `/get_server_info`). Endpoints marked as admin-only require `Authorization: Bearer <admin_api_key>` when this is set. | `None` | Type: str |
215+
| `--admin-api-key` | Set **admin API key** for administrative/control endpoints (e.g., weights update, cache flush, `/server_info`). Endpoints marked as admin-only require `Authorization: Bearer <admin_api_key>` when this is set. | `None` | Type: str |
216216
| `--served-model-name` | Override the model name returned by the v1/models endpoint in OpenAI API server. | `None` | Type: str |
217217
| `--weight-version` | Version identifier for the model weights. Defaults to 'default' if not specified. | `default` | Type: str |
218218
| `--chat-template` | The builtin chat template name or the path of the chat template file. This is only used for OpenAI-compatible API server. | `None` | Type: str |

docs/advanced_features/sgl_model_gateway.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ SGLang Model Gateway is a high-performance model-routing gateway for large-scale
7777

7878
### Control Plane
7979

80-
- **Worker Manager** discovers capabilities (`/get_server_info`, `/get_model_info`), tracks load, and registers/removes workers in the shared registry.
80+
- **Worker Manager** discovers capabilities (`/server_info`, `/get_model_info`), tracks load, and registers/removes workers in the shared registry.
8181
- **Job Queue** serializes add/remove requests and exposes status (`/workers/{worker_id}`) so clients can track onboarding progress.
8282
- **Load Monitor** feeds cache-aware and power-of-two policies with live worker load statistics.
8383
- **Health Checker** continuously probes workers and updates readiness, circuit breaker state, and router metrics.
@@ -552,7 +552,7 @@ Response:
552552
| `GET` | `/engine_metrics` | Engine-level metrics from workers |
553553
| `GET` | `/v1/models` | List available models |
554554
| `GET` | `/get_model_info` | Get model information |
555-
| `GET` | `/get_server_info` | Get server information |
555+
| `GET` | `/server_info` | Get server information |
556556
| `POST` | `/flush_cache` | Clear all caches |
557557
| `GET` | `/get_loads` | Get all worker loads |
558558
| `POST` | `/wasm` | Upload WASM module |

docs/basic_usage/native_api.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"\n",
1111
"- `/generate` (text generation model)\n",
1212
"- `/get_model_info`\n",
13-
"- `/get_server_info`\n",
13+
"- `/server_info`\n",
1414
"- `/health`\n",
1515
"- `/health_generate`\n",
1616
"- `/flush_cache`\n",
@@ -140,7 +140,7 @@
140140
"metadata": {},
141141
"outputs": [],
142142
"source": [
143-
"url = f\"http://localhost:{port}/get_server_info\"\n",
143+
"url = f\"http://localhost:{port}/server_info\"\n",
144144
"\n",
145145
"response = requests.get(url)\n",
146146
"print_highlight(response.text)"

docs/developer_guide/bench_serving.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,4 +352,4 @@ python3 -m sglang.bench_serving \
352352
### Notes
353353

354354
- The script raises the file descriptor soft limit (`RLIMIT_NOFILE`) to help with many concurrent connections.
355-
- For sglang, `/get_server_info` is queried post-run to report speculative decoding accept length when available.
355+
- For sglang, `/server_info` is queried post-run to report speculative decoding accept length when available.

python/sglang/bench_serving.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,7 +1402,7 @@ async def limited_request_func(request_func_input, pbar):
14021402

14031403
if "sglang" in backend:
14041404
server_info = requests.get(
1405-
base_url + "/get_server_info", headers=get_auth_headers()
1405+
base_url + "/server_info", headers=get_auth_headers()
14061406
)
14071407
if server_info.status_code == 200:
14081408
server_info_json = server_info.json()
@@ -1538,7 +1538,7 @@ async def limited_request_func(request_func_input, pbar):
15381538
print("{:<40} {:<10.2f}".format("Max ITL (ms):", metrics.max_itl_ms))
15391539
print("=" * 50)
15401540

1541-
resp = requests.get(base_url + "/get_server_info", headers=get_auth_headers())
1541+
resp = requests.get(base_url + "/server_info", headers=get_auth_headers())
15421542
server_info = resp.json() if resp.status_code == 200 else None
15431543

15441544
if (

python/sglang/lang/backend/runtime_endpoint.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def flush_cache(self):
6767

6868
def get_server_info(self):
6969
res = http_request(
70-
self.base_url + "/get_server_info",
70+
self.base_url + "/server_info",
7171
api_key=self.api_key,
7272
verify=self.verify,
7373
)
@@ -531,7 +531,7 @@ def encode(
531531

532532
async def get_server_info(self):
533533
async with aiohttp.ClientSession() as session:
534-
async with session.get(f"{self.url}/get_server_info") as response:
534+
async with session.get(f"{self.url}/server_info") as response:
535535
if response.status == 200:
536536
return await response.json()
537537
else:

python/sglang/profiler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def run_profile(
4242
# Dump server args.
4343
file_path = Path(output_dir) / "server_args.json"
4444
if not file_path.exists():
45-
response = requests.get(url + "/get_server_info")
45+
response = requests.get(url + "/server_info")
4646
response.raise_for_status()
4747
server_args_data = response.json()
4848
with open(file_path, "w") as file:

python/sglang/test/bench_one_batch_server_internal.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -609,7 +609,7 @@ def run_one_case(
609609
last_gen_throughput = -1
610610
acc_length = -1
611611
else:
612-
response = requests.get(url + "/get_server_info", timeout=DEFAULT_TIMEOUT)
612+
response = requests.get(url + "/server_info", timeout=DEFAULT_TIMEOUT)
613613
response.raise_for_status()
614614
server_info = response.json()
615615
internal_state = server_info.get("internal_states", [{}])
@@ -793,7 +793,7 @@ def run_benchmark_internal(
793793
skip_max_running_requests_threshold = float("inf")
794794
else:
795795
model_name = None
796-
response = requests.get(base_url + "/get_server_info", timeout=DEFAULT_TIMEOUT)
796+
response = requests.get(base_url + "/server_info", timeout=DEFAULT_TIMEOUT)
797797
response.raise_for_status()
798798
server_info = response.json()
799799
if "tokenizer_path" in server_info:

python/sglang/test/kits/cache_hit_kit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ async def _send_one(payload):
221221
def _get_page_size(base_url: str) -> int:
222222
"""Query server for page_size used by radix cache."""
223223
try:
224-
resp = requests.get(f"{base_url}/get_server_info", timeout=10)
224+
resp = requests.get(f"{base_url}/server_info", timeout=10)
225225
resp.raise_for_status()
226226
info = resp.json()
227227
return info.get("page_size", 1)

python/sglang/test/kl_test_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ def test_input_output_logprobs_match_helper(
208208
def test_input_output_logprobs_match_prefill_cache_hit_helper(
209209
base_url, ACC_THRESHOLDS, model_name, max_samples=None, max_new_tokens=8192
210210
):
211-
server_info = requests.get(base_url + "/get_server_info").json()
211+
server_info = requests.get(base_url + "/server_info").json()
212212
if server_info["disable_radix_cache"]:
213213
print("Radix cache is disabled, skipping test")
214214
return
@@ -261,7 +261,7 @@ def test_input_output_logprobs_match_prefill_cache_hit_helper(
261261
def test_input_output_logprobs_match_decode_cache_hit_helper(
262262
base_url, ACC_THRESHOLDS, model_name, max_samples=None, max_new_tokens=8192
263263
):
264-
server_info = requests.get(base_url + "/get_server_info").json()
264+
server_info = requests.get(base_url + "/server_info").json()
265265
if server_info["disable_radix_cache"]:
266266
print("Radix cache is disabled, skipping test")
267267
return

0 commit comments

Comments
 (0)