cleanup: upgrade vllm version to v0.7.3

nayihz · nayihz · commit 8aaf43d181ba · 2025-03-06T06:02:41.000Z
diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml
@@ -13,7 +13,7 @@ spec:
     - -m
     - vllm.entrypoints.openai.api_server
   image: vllm/vllm-openai
-  version: v0.6.0
+  version: v0.7.3
   # Do not edit the preset argument name unless you know what you're doing.
   # Free to add more arguments with your requirements.
   recommendedConfigs:
diff --git a/test/integration/webhook/backendruntime_test.go b/test/integration/webhook/backendruntime_test.go
@@ -88,7 +88,7 @@ var _ = ginkgo.Describe("BackendRuntime default and validation", func() {
 		ginkgo.Entry("BackendRuntime creation with no resources", &testValidatingCase{
 			creationFunc: func() *inferenceapi.BackendRuntime {
 				return wrapper.MakeBackendRuntime("vllm").
-					Image("vllm/vllm-openai").Version("v0.6.0").
+					Image("vllm/vllm-openai").Version(util.VllmImageVersion).
 					Command([]string{"python3", "-m", "vllm.entrypoints.openai.api_server"}).
 					Arg("default", []string{"--model", "{{.ModelPath}}", "--served-model-name", "{{.ModelName}}", "--host", "0.0.0.0", "--port", "8080"}).
 					Obj()
diff --git a/test/util/mock.go b/test/util/mock.go
@@ -28,7 +28,8 @@ import (
 )
 
 const (
-	sampleModelName = "llama3-8b"
+	sampleModelName  = "llama3-8b"
+	VllmImageVersion = "v0.7.3"
 )
 
 func MockASampleModel() *coreapi.OpenModel {
@@ -54,7 +55,7 @@ func MockASampleService(ns string) *inferenceapi.Service {
 
 func MockASampleBackendRuntime() *wrapper.BackendRuntimeWrapper {
 	return wrapper.MakeBackendRuntime("vllm").
-		Image("vllm/vllm-openai").Version("v0.6.0").
+		Image("vllm/vllm-openai").Version(VllmImageVersion).
 		Command([]string{"python3", "-m", "vllm.entrypoints.openai.api_server"}).
 		Arg("default", []string{"--model", "{{.ModelPath}}", "--served-model-name", "{{.ModelName}}", "--host", "0.0.0.0", "--port", "8080"}).
 		Request("default", "cpu", "4").Limit("default", "cpu", "4")