From a8f8412bb8f92cef6f94162a66830080c87cbda0 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Sat, 15 Feb 2025 23:49:11 +0800 Subject: [PATCH 1/2] Support shared memory size Signed-off-by: kerthcet --- .github/ISSUE_TEMPLATE/BUG_REPORT.md | 1 - .github/ISSUE_TEMPLATE/ENHANCEMENT.md | 2 +- api/core/v1alpha1/model_types.go | 4 ++ config/crd/bases/llmaz.io_openmodels.yaml | 9 +++++ .../inference/playground_controller.go | 40 ++++++++++++++++++- test/util/mock.go | 1 + test/util/validation/validate_playground.go | 12 ++++++ test/util/wrapper/model.go | 9 +++++ 8 files changed, 74 insertions(+), 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/BUG_REPORT.md b/.github/ISSUE_TEMPLATE/BUG_REPORT.md index 6e4bfe31..c98695b0 100644 --- a/.github/ISSUE_TEMPLATE/BUG_REPORT.md +++ b/.github/ISSUE_TEMPLATE/BUG_REPORT.md @@ -21,7 +21,6 @@ assignees: '' **Environment**: - Kubernetes version (use `kubectl version`): -- LWS version: - llmaz version (use `git describe --tags --dirty --always`): - Cloud provider or hardware configuration: - OS (e.g: `cat /etc/os-release`): diff --git a/.github/ISSUE_TEMPLATE/ENHANCEMENT.md b/.github/ISSUE_TEMPLATE/ENHANCEMENT.md index 29d31222..2f83fe80 100644 --- a/.github/ISSUE_TEMPLATE/ENHANCEMENT.md +++ b/.github/ISSUE_TEMPLATE/ENHANCEMENT.md @@ -2,7 +2,7 @@ name: Enhancement Request about: Suggest an enhancement to the llmaz project title: '' -labels: kind/feature +labels: feature assignees: '' --- diff --git a/api/core/v1alpha1/model_types.go b/api/core/v1alpha1/model_types.go index d3ac4840..8fe6ae93 100644 --- a/api/core/v1alpha1/model_types.go +++ b/api/core/v1alpha1/model_types.go @@ -18,6 +18,7 @@ package v1alpha1 import ( v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -131,6 +132,9 @@ type InferenceConfig struct { // +kubebuilder:validation:MaxItems=8 // +optional Flavors []Flavor `json:"flavors,omitempty"` + // SharedMemorySize represents the size of /dev/shm required in the runtime of + // inference workload. + SharedMemorySize *resource.Quantity `json:"sharedMemorySize,omitempty"` } type ModelName string diff --git a/config/crd/bases/llmaz.io_openmodels.yaml b/config/crd/bases/llmaz.io_openmodels.yaml index 61c561f2..d72a2f8c 100644 --- a/config/crd/bases/llmaz.io_openmodels.yaml +++ b/config/crd/bases/llmaz.io_openmodels.yaml @@ -105,6 +105,15 @@ spec: type: object maxItems: 8 type: array + sharedMemorySize: + anyOf: + - type: integer + - type: string + description: |- + SharedMemorySize represents the size of /dev/shm required in the runtime of + inference workload. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true type: object source: description: |- diff --git a/pkg/controller/inference/playground_controller.go b/pkg/controller/inference/playground_controller.go index 61de4295..41a9dc68 100644 --- a/pkg/controller/inference/playground_controller.go +++ b/pkg/controller/inference/playground_controller.go @@ -265,7 +265,7 @@ func buildWorkloadTemplate(models []*coreapi.OpenModel, playground *inferenceapi if multiHost { workload.LeaderWorkerTemplate.LeaderTemplate = &template - workload.LeaderWorkerTemplate.WorkerTemplate = buildWorkerTemplate(playground, backendRuntime) + workload.LeaderWorkerTemplate.WorkerTemplate = buildWorkerTemplate(models, playground, backendRuntime) } else { workload.LeaderWorkerTemplate.WorkerTemplate = template } @@ -366,12 +366,30 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro }, } + // construct /dev/shm size + if models[0].Spec.InferenceConfig != nil && models[0].Spec.InferenceConfig.SharedMemorySize != nil { + template.Spec.Volumes = append(template.Spec.Volumes, corev1.Volume{ + Name: "dshm", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{ + Medium: corev1.StorageMediumMemory, + SizeLimit: models[0].Spec.InferenceConfig.SharedMemorySize, + }, + }, + }) + + template.Spec.Containers[0].VolumeMounts = append(template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{ + Name: "dshm", + MountPath: "/dev/shm", + }) + } + return template, nil } // This is a copy of buildTemplate with some refactors, only used in multi-nodes cases. // Worker template has no args, no contain port. -func buildWorkerTemplate(playground *inferenceapi.Playground, backendRuntime *inferenceapi.BackendRuntime) corev1.PodTemplateSpec { +func buildWorkerTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playground, backendRuntime *inferenceapi.BackendRuntime) corev1.PodTemplateSpec { parser := helper.NewBackendRuntimeParser(backendRuntime) envs := parser.Envs() @@ -423,6 +441,24 @@ func buildWorkerTemplate(playground *inferenceapi.Playground, backendRuntime *in }, } + // construct /dev/shm size + if models[0].Spec.InferenceConfig != nil && models[0].Spec.InferenceConfig.SharedMemorySize != nil { + template.Spec.Volumes = append(template.Spec.Volumes, corev1.Volume{ + Name: "dshm", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{ + Medium: corev1.StorageMediumMemory, + SizeLimit: models[0].Spec.InferenceConfig.SharedMemorySize, + }, + }, + }) + + template.Spec.Containers[0].VolumeMounts = append(template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{ + Name: "dshm", + MountPath: "/dev/shm", + }) + } + return template } diff --git a/test/util/mock.go b/test/util/mock.go index 9a19c161..54f8e10b 100644 --- a/test/util/mock.go +++ b/test/util/mock.go @@ -37,6 +37,7 @@ func MockASampleModel() *coreapi.OpenModel { InferenceFlavors( *wrapper.MakeFlavor("a100").SetRequest("nvidia.com/gpu", "1").Obj(), *wrapper.MakeFlavor("a10").SetRequest("nvidia.com/gpu", "2").Obj()). + SharedMemorySize("1Gi"). Obj() } diff --git a/test/util/validation/validate_playground.go b/test/util/validation/validate_playground.go index 397087a4..2d448e6a 100644 --- a/test/util/validation/validate_playground.go +++ b/test/util/validation/validate_playground.go @@ -260,6 +260,18 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground return errors.New("command not right") } } + + if models[0].Spec.InferenceConfig != nil && models[0].Spec.InferenceConfig.SharedMemorySize != nil { + if multiHost { + if *models[0].Spec.InferenceConfig.SharedMemorySize != *service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].EmptyDir.SizeLimit { + return fmt.Errorf("expected SharedMemorySize %s, got %s", models[0].Spec.InferenceConfig.SharedMemorySize.String(), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].EmptyDir.SizeLimit.String()) + } + } + if *models[0].Spec.InferenceConfig.SharedMemorySize != *service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].EmptyDir.SizeLimit { + return fmt.Errorf("expected SharedMemorySize %s, got %s", models[0].Spec.InferenceConfig.SharedMemorySize.String(), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].EmptyDir.SizeLimit.String()) + } + } + return nil }, util.IntegrationTimeout, util.Interval).Should(gomega.Succeed()) diff --git a/test/util/wrapper/model.go b/test/util/wrapper/model.go index 481c6859..11e930b8 100644 --- a/test/util/wrapper/model.go +++ b/test/util/wrapper/model.go @@ -107,6 +107,15 @@ func (w *ModelWrapper) Label(k, v string) *ModelWrapper { return w } +func (w *ModelWrapper) SharedMemorySize(v string) *ModelWrapper { + if w.Spec.InferenceConfig == nil { + w.Spec.InferenceConfig = &coreapi.InferenceConfig{} + } + value := resource.MustParse(v) + w.Spec.InferenceConfig.SharedMemorySize = &value + return w +} + func MakeFlavor(name string) *FlavorWrapper { return &FlavorWrapper{ coreapi.Flavor{ From 4634e232ac941e04fd8a9c06be2bcd8c099eb8c5 Mon Sep 17 00:00:00 2001 From: kerthcet Date: Sun, 16 Feb 2025 00:53:03 +0800 Subject: [PATCH 2/2] fix golangci Signed-off-by: kerthcet --- .golangci.yaml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.golangci.yaml b/.golangci.yaml index 9a2138e5..2c5d7267 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -1,11 +1,6 @@ run: - deadline: 5m + timeout: 5m allow-parallel-runners: true - dirs: - - api - - cmd - - pkg - - test issues: # don't skip warning about doc comments