Skip to content

Commit 1195182

Browse files
authored
Tiny add Engine.flush_cache API (#5241)
1 parent 5239d79 commit 1195182

File tree

5 files changed

+29
-11
lines changed

5 files changed

+29
-11
lines changed

python/sglang/srt/entrypoints/engine.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,10 @@ def __exit__(self, exc_type, exc_value, traceback):
279279
self.shutdown()
280280
return False
281281

282+
def flush_cache(self):
283+
loop = asyncio.get_event_loop()
284+
return loop.run_until_complete(self.tokenizer_manager.flush_cache())
285+
282286
def start_profile(self):
283287
loop = asyncio.get_event_loop()
284288
loop.run_until_complete(self.tokenizer_manager.start_profile())

python/sglang/srt/entrypoints/http_server.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,11 +315,11 @@ async def classify_request(obj: EmbeddingReqInput, request: Request):
315315
@app.api_route("/flush_cache", methods=["GET", "POST"])
316316
async def flush_cache():
317317
"""Flush the radix cache."""
318-
_global_state.tokenizer_manager.flush_cache()
318+
ret = await _global_state.tokenizer_manager.flush_cache()
319319
return Response(
320320
content="Cache flushed.\nPlease check backend logs for more details. "
321321
"(When there are running or waiting requests, the operation will not be performed.)\n",
322-
status_code=200,
322+
status_code=200 if ret.success else HTTPStatus.BAD_REQUEST,
323323
)
324324

325325

python/sglang/srt/managers/io_struct.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -671,10 +671,15 @@ class BatchEmbeddingOut:
671671

672672

673673
@dataclass
674-
class FlushCacheReq:
674+
class FlushCacheReqInput:
675675
pass
676676

677677

678+
@dataclass
679+
class FlushCacheReqOutput:
680+
success: bool
681+
682+
678683
@dataclass
679684
class UpdateWeightFromDiskReqInput:
680685
# The model path with the new weights

python/sglang/srt/managers/scheduler.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@
6060
CloseSessionReqInput,
6161
ExpertDistributionReq,
6262
ExpertDistributionReqOutput,
63-
FlushCacheReq,
63+
FlushCacheReqInput,
64+
FlushCacheReqOutput,
6465
GetInternalStateReq,
6566
GetInternalStateReqOutput,
6667
GetWeightsByNameReqInput,
@@ -402,7 +403,7 @@ def __init__(
402403
[
403404
(TokenizedGenerateReqInput, self.handle_generate_request),
404405
(TokenizedEmbeddingReqInput, self.handle_embedding_request),
405-
(FlushCacheReq, self.flush_cache_wrapped),
406+
(FlushCacheReqInput, self.flush_cache_wrapped),
406407
(AbortReq, self.abort_request),
407408
(OpenSessionReqInput, self.open_session),
408409
(CloseSessionReqInput, self.close_session),
@@ -1596,8 +1597,9 @@ def watchdog_thread(self):
15961597
time.sleep(5)
15971598
self.parent_process.send_signal(signal.SIGQUIT)
15981599

1599-
def flush_cache_wrapped(self, recv_req: FlushCacheReq):
1600-
self.flush_cache()
1600+
def flush_cache_wrapped(self, recv_req: FlushCacheReqInput):
1601+
success = self.flush_cache()
1602+
return FlushCacheReqOutput(success=success)
16011603

16021604
def flush_cache(self):
16031605
"""Flush the memory pool and cache."""

python/sglang/srt/managers/tokenizer_manager.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@
6666
EmbeddingReqInput,
6767
ExpertDistributionReq,
6868
ExpertDistributionReqOutput,
69-
FlushCacheReq,
69+
FlushCacheReqInput,
70+
FlushCacheReqOutput,
7071
GenerateReqInput,
7172
GetInternalStateReq,
7273
GetInternalStateReqOutput,
@@ -264,6 +265,9 @@ def __init__(
264265
self.resume_memory_occupation_communicator = _Communicator(
265266
self.send_to_scheduler, server_args.dp_size
266267
)
268+
self.flush_cache_communicator = _Communicator(
269+
self.send_to_scheduler, server_args.dp_size
270+
)
267271
self.start_profile_communicator = _Communicator(
268272
self.send_to_scheduler, server_args.dp_size
269273
)
@@ -314,6 +318,10 @@ def __init__(
314318
ResumeMemoryOccupationReqOutput,
315319
self.resume_memory_occupation_communicator.handle_recv,
316320
),
321+
(
322+
FlushCacheReqOutput,
323+
self.flush_cache_communicator.handle_recv,
324+
),
317325
(
318326
ProfileReqOutput,
319327
self.start_profile_communicator.handle_recv,
@@ -707,9 +715,8 @@ async def _handle_batch_request(
707715
except StopAsyncIteration:
708716
pass
709717

710-
def flush_cache(self):
711-
req = FlushCacheReq()
712-
self.send_to_scheduler.send_pyobj(req)
718+
async def flush_cache(self) -> FlushCacheReqOutput:
719+
return await self.flush_cache_communicator(FlushCacheReqInput())
713720

714721
def abort_request(self, rid: str):
715722
if rid not in self.rid_to_state:

0 commit comments

Comments
 (0)