Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config/manifests/vllm/gpu-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ spec:
spec:
containers:
- name: vllm
image: "vllm/vllm-openai:v0.10.0"
image: "vllm/vllm-openai:v0.15.1"
imagePullPolicy: IfNotPresent
command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
args:
Expand Down
2 changes: 1 addition & 1 deletion config/manifests/vllm/sim-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ spec:
spec:
containers:
- name: vllm-sim
image: ghcr.io/llm-d/llm-d-inference-sim:v0.3.2-fix
image: ghcr.io/llm-d/llm-d-inference-sim:v0.7.1
imagePullPolicy: IfNotPresent
args:
- --model
Expand Down
43 changes: 15 additions & 28 deletions hack/release-quickstart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,17 @@ else
RELEASE_TAG="v${MAJOR}.${MINOR}.0-rc.${RC}"
fi

# The vLLM image versions
# The GPU image is from https://hub.docker.com/r/vllm/vllm-openai/tags
VLLM_GPU="${VLLM_GPU:-0.10.0}"
# The CPU image is from https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo
VLLM_CPU="${VLLM_CPU:-0.10.0}"
# The sim image is from https://github.com/llm-d/llm-d-inference-sim/pkgs/container/llm-d-inference-sim
VLLM_SIM="${VLLM_SIM:-0.3.2-fix}"
VLLM_GPU_DEPLOY="config/manifests/vllm/gpu-deployment.yaml"
VLLM_CPU_DEPLOY="config/manifests/vllm/cpu-deployment.yaml"
VLLM_SIM_DEPLOY="config/manifests/vllm/sim-deployment.yaml"
VLLM_GPU_PULL_POLICY="${VLLM_GPU_PULL_POLICY:-IfNotPresent}"
VLLM_CPU_PULL_POLICY="${VLLM_CPU_PULL_POLICY:-IfNotPresent}"
VLLM_SIM_PULL_POLICY="${VLLM_SIM_PULL_POLICY:-IfNotPresent}"

echo "Using release tag: ${RELEASE_TAG}"
echo "Using vLLM GPU image version: ${VLLM_GPU}"
echo "Using vLLM CPU image version: ${VLLM_CPU}"
echo "Using vLLM Simulator image version: ${VLLM_SIM}"
echo "Using vLLM GPU image pull policy: ${VLLM_GPU_PULL_POLICY}"
echo "Using vLLM CPU image pull policy: ${VLLM_CPU_PULL_POLICY}"
echo "Using vLLM Simulator image pull policy: ${VLLM_SIM_PULL_POLICY}"

# -----------------------------------------------------------------------------
# Update version/version.go and generating CRDs with new version annotations
Expand Down Expand Up @@ -100,32 +99,20 @@ sed -i.bak -E "s|us-central1-docker\.pkg\.dev/k8s-staging-images|registry.k8s.io
# -----------------------------------------------------------------------------
# Update vLLM deployment manifests
# -----------------------------------------------------------------------------
VLLM_GPU_DEPLOY="config/manifests/vllm/gpu-deployment.yaml"
echo "Updating ${VLLM_GPU_DEPLOY} ..."

# Update the vLLM GPU image version
sed -i.bak -E "s|(vllm/vllm-openai:)[^\"[:space:]]+|\1v${VLLM_GPU}|g" "$VLLM_GPU_DEPLOY"

# Also change the imagePullPolicy from Always to IfNotPresent on lines containing the vLLM image.
sed -i.bak '/vllm\/vllm-openai/{n;s/Always/IfNotPresent/;}' "$VLLM_GPU_DEPLOY"
# Update the imagePullPolicy on the line following the vLLM image.
sed -i.bak "/vllm\\/vllm-openai/{n;s|imagePullPolicy: .*|imagePullPolicy: ${VLLM_GPU_PULL_POLICY}|;}" "$VLLM_GPU_DEPLOY"

VLLM_CPU_DEPLOY="config/manifests/vllm/cpu-deployment.yaml"
echo "Updating ${VLLM_CPU_DEPLOY} ..."

# Update the vLLM CPU image version
sed -i.bak -E "s|(q9t5s3a7/vllm-cpu-release-repo:)[^\"[:space:]]+|\1v${VLLM_CPU}|g" "$VLLM_CPU_DEPLOY"

# Also change the imagePullPolicy from Always to IfNotPresent on lines containing the vLLM CPU image.
sed -i.bak '/q9t5s3a7\/vllm-cpu-release-repo/{n;s/Always/IfNotPresent/;}' "$VLLM_CPU_DEPLOY"
# Update the imagePullPolicy on the line following the vLLM CPU image.
sed -i.bak "/q9t5s3a7\\/vllm-cpu-release-repo/{n;s|imagePullPolicy: .*|imagePullPolicy: ${VLLM_CPU_PULL_POLICY}|;}" "$VLLM_CPU_DEPLOY"

VLLM_SIM_DEPLOY="config/manifests/vllm/sim-deployment.yaml"
echo "Updating ${VLLM_SIM_DEPLOY} ..."

# Update the vLLM Simulator image version
sed -i.bak -E "s|(llm-d/llm-d-inference-sim:)[^\"[:space:]]+|\1v${VLLM_SIM}|g" "$VLLM_SIM_DEPLOY"

# Also change the imagePullPolicy from Always to IfNotPresent on lines containing the vLLM image.
sed -i.bak '/llm-d\/llm-d-inference-sim/{n;s/Always/IfNotPresent/;}' "$VLLM_SIM_DEPLOY"
# Update the imagePullPolicy on the line following the vLLM image.
sed -i.bak "/llm-d\\/llm-d-inference-sim/{n;s|imagePullPolicy: .*|imagePullPolicy: ${VLLM_SIM_PULL_POLICY}|;}" "$VLLM_SIM_DEPLOY"

# Update the container tag for lora-syncer in vLLM CPU and GPU deployment manifests.
sed -i.bak -E "s|(gateway-api-inference-extension/lora-syncer:)[^\"[:space:]]+|\1${RELEASE_TAG}|g" "$VLLM_GPU_DEPLOY" "$VLLM_CPU_DEPLOY"
Expand Down