diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml index d1bddcd9..ca99bc1d 100644 --- a/chart/templates/backends/vllm.yaml +++ b/chart/templates/backends/vllm.yaml @@ -13,7 +13,7 @@ spec: - -m - vllm.entrypoints.openai.api_server image: vllm/vllm-openai - version: v0.6.0 + version: v0.7.3 # Do not edit the preset argument name unless you know what you're doing. # Free to add more arguments with your requirements. recommendedConfigs: diff --git a/test/integration/webhook/backendruntime_test.go b/test/integration/webhook/backendruntime_test.go index 5199535d..b3d96470 100644 --- a/test/integration/webhook/backendruntime_test.go +++ b/test/integration/webhook/backendruntime_test.go @@ -88,7 +88,7 @@ var _ = ginkgo.Describe("BackendRuntime default and validation", func() { ginkgo.Entry("BackendRuntime creation with no resources", &testValidatingCase{ creationFunc: func() *inferenceapi.BackendRuntime { return wrapper.MakeBackendRuntime("vllm"). - Image("vllm/vllm-openai").Version("v0.6.0"). + Image("vllm/vllm-openai").Version(util.VllmImageVersion). Command([]string{"python3", "-m", "vllm.entrypoints.openai.api_server"}). Arg("default", []string{"--model", "{{.ModelPath}}", "--served-model-name", "{{.ModelName}}", "--host", "0.0.0.0", "--port", "8080"}). Obj() diff --git a/test/util/consts.go b/test/util/consts.go index 32181b47..2ec0e0da 100644 --- a/test/util/consts.go +++ b/test/util/consts.go @@ -23,4 +23,6 @@ const ( Interval = time.Millisecond * 250 E2ETimeout = 5 * time.Minute E2EInterval = 1 * time.Second + + VllmImageVersion = "v0.7.3" ) diff --git a/test/util/mock.go b/test/util/mock.go index e5691bb2..7f768ad0 100644 --- a/test/util/mock.go +++ b/test/util/mock.go @@ -54,7 +54,7 @@ func MockASampleService(ns string) *inferenceapi.Service { func MockASampleBackendRuntime() *wrapper.BackendRuntimeWrapper { return wrapper.MakeBackendRuntime("vllm"). - Image("vllm/vllm-openai").Version("v0.6.0"). + Image("vllm/vllm-openai").Version(VllmImageVersion). Command([]string{"python3", "-m", "vllm.entrypoints.openai.api_server"}). Arg("default", []string{"--model", "{{.ModelPath}}", "--served-model-name", "{{.ModelName}}", "--host", "0.0.0.0", "--port", "8080"}). Request("default", "cpu", "4").Limit("default", "cpu", "4")