InftyAI · InftyAI-Agent · Mar 6, 2025 · Mar 6, 2025
diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml
@@ -13,7 +13,7 @@ spec:
     - -m
     - vllm.entrypoints.openai.api_server
   image: vllm/vllm-openai
-  version: v0.6.0
+  version: v0.7.3
   # Do not edit the preset argument name unless you know what you're doing.
   # Free to add more arguments with your requirements.
   recommendedConfigs:

diff --git a/test/integration/webhook/backendruntime_test.go b/test/integration/webhook/backendruntime_test.go
@@ -88,7 +88,7 @@ var _ = ginkgo.Describe("BackendRuntime default and validation", func() {
 		ginkgo.Entry("BackendRuntime creation with no resources", &testValidatingCase{
 			creationFunc: func() *inferenceapi.BackendRuntime {
 				return wrapper.MakeBackendRuntime("vllm").
-					Image("vllm/vllm-openai").Version("v0.6.0").
+					Image("vllm/vllm-openai").Version(util.VllmImageVersion).
 					Command([]string{"python3", "-m", "vllm.entrypoints.openai.api_server"}).
 					Arg("default", []string{"--model", "{{.ModelPath}}", "--served-model-name", "{{.ModelName}}", "--host", "0.0.0.0", "--port", "8080"}).
 					Obj()

diff --git a/test/util/consts.go b/test/util/consts.go
@@ -23,4 +23,6 @@ const (
 	Interval           = time.Millisecond * 250
 	E2ETimeout         = 5 * time.Minute
 	E2EInterval        = 1 * time.Second
+
+	VllmImageVersion = "v0.7.3"
 )
diff --git a/test/util/mock.go b/test/util/mock.go
@@ -54,7 +54,7 @@ func MockASampleService(ns string) *inferenceapi.Service {
 
 func MockASampleBackendRuntime() *wrapper.BackendRuntimeWrapper {
 	return wrapper.MakeBackendRuntime("vllm").
-		Image("vllm/vllm-openai").Version("v0.6.0").
+		Image("vllm/vllm-openai").Version(VllmImageVersion).
 		Command([]string{"python3", "-m", "vllm.entrypoints.openai.api_server"}).
 		Arg("default", []string{"--model", "{{.ModelPath}}", "--served-model-name", "{{.ModelName}}", "--host", "0.0.0.0", "--port", "8080"}).
 		Request("default", "cpu", "4").Limit("default", "cpu", "4")