diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml index b0691225..2614f6fb 100644 --- a/chart/templates/backends/vllm.yaml +++ b/chart/templates/backends/vllm.yaml @@ -14,6 +14,24 @@ spec: - vllm.entrypoints.openai.api_server image: vllm/vllm-openai version: v0.7.3 + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - | + while true; do + RUNNING=$(curl -s http://localhost:8000/metrics | grep 'vllm:num_requests_running' | grep -v '#' | awk '{print $2}') + WAITING=$(curl -s http://localhost:8000/metrics | grep 'vllm:num_requests_waiting' | grep -v '#' | awk '{print $2}') + if [ "$RUNNING" = "0.0" ] && [ "$WAITING" = "0.0" ]; then + echo "Terminating: No active or waiting requests, safe to terminate" >> /proc/1/fd/1 + exit 0 + else + echo "Terminating: Running: $RUNNING, Waiting: $WAITING" >> /proc/1/fd/1 + sleep 5 + fi + done # Do not edit the preset argument name unless you know what you're doing. # Free to add more arguments with your requirements. recommendedConfigs: