Skip to content

Commit 211a0ef

Browse files
authored
feature: add in-tree BackendRuntime for preStop Hook (#319)
1 parent 5adc074 commit 211a0ef

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

chart/templates/backends/vllm.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,24 @@ spec:
1414
- vllm.entrypoints.openai.api_server
1515
image: vllm/vllm-openai
1616
version: v0.7.3
17+
lifecycle:
18+
preStop:
19+
exec:
20+
command:
21+
- /bin/sh
22+
- -c
23+
- |
24+
while true; do
25+
RUNNING=$(curl -s http://localhost:8000/metrics | grep 'vllm:num_requests_running' | grep -v '#' | awk '{print $2}')
26+
WAITING=$(curl -s http://localhost:8000/metrics | grep 'vllm:num_requests_waiting' | grep -v '#' | awk '{print $2}')
27+
if [ "$RUNNING" = "0.0" ] && [ "$WAITING" = "0.0" ]; then
28+
echo "Terminating: No active or waiting requests, safe to terminate" >> /proc/1/fd/1
29+
exit 0
30+
else
31+
echo "Terminating: Running: $RUNNING, Waiting: $WAITING" >> /proc/1/fd/1
32+
sleep 5
33+
fi
34+
done
1735
# Do not edit the preset argument name unless you know what you're doing.
1836
# Free to add more arguments with your requirements.
1937
recommendedConfigs:

0 commit comments

Comments
 (0)