Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/ISSUE_TEMPLATE/BUG_REPORT.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ assignees: ''
**Environment**:

- Kubernetes version (use `kubectl version`):
- LWS version:
- llmaz version (use `git describe --tags --dirty --always`):
- Cloud provider or hardware configuration:
- OS (e.g: `cat /etc/os-release`):
Expand Down
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/ENHANCEMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name: Enhancement Request
about: Suggest an enhancement to the llmaz project
title: ''
labels: kind/feature
labels: feature
assignees: ''

---
Expand Down
7 changes: 1 addition & 6 deletions .golangci.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
run:
deadline: 5m
timeout: 5m
allow-parallel-runners: true
dirs:
- api
- cmd
- pkg
- test

issues:
# don't skip warning about doc comments
Expand Down
4 changes: 4 additions & 0 deletions api/core/v1alpha1/model_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package v1alpha1

import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand Down Expand Up @@ -131,6 +132,9 @@ type InferenceConfig struct {
// +kubebuilder:validation:MaxItems=8
// +optional
Flavors []Flavor `json:"flavors,omitempty"`
// SharedMemorySize represents the size of /dev/shm required in the runtime of
// inference workload.
SharedMemorySize *resource.Quantity `json:"sharedMemorySize,omitempty"`
}

type ModelName string
Expand Down
9 changes: 9 additions & 0 deletions config/crd/bases/llmaz.io_openmodels.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,15 @@ spec:
type: object
maxItems: 8
type: array
sharedMemorySize:
anyOf:
- type: integer
- type: string
description: |-
SharedMemorySize represents the size of /dev/shm required in the runtime of
inference workload.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
source:
description: |-
Expand Down
40 changes: 38 additions & 2 deletions pkg/controller/inference/playground_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ func buildWorkloadTemplate(models []*coreapi.OpenModel, playground *inferenceapi

if multiHost {
workload.LeaderWorkerTemplate.LeaderTemplate = &template
workload.LeaderWorkerTemplate.WorkerTemplate = buildWorkerTemplate(playground, backendRuntime)
workload.LeaderWorkerTemplate.WorkerTemplate = buildWorkerTemplate(models, playground, backendRuntime)
} else {
workload.LeaderWorkerTemplate.WorkerTemplate = template
}
Expand Down Expand Up @@ -366,12 +366,30 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
},
}

// construct /dev/shm size
if models[0].Spec.InferenceConfig != nil && models[0].Spec.InferenceConfig.SharedMemorySize != nil {
template.Spec.Volumes = append(template.Spec.Volumes, corev1.Volume{
Name: "dshm",
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: models[0].Spec.InferenceConfig.SharedMemorySize,
},
},
})

template.Spec.Containers[0].VolumeMounts = append(template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{
Name: "dshm",
MountPath: "/dev/shm",
})
}

return template, nil
}

// This is a copy of buildTemplate with some refactors, only used in multi-nodes cases.
// Worker template has no args, no contain port.
func buildWorkerTemplate(playground *inferenceapi.Playground, backendRuntime *inferenceapi.BackendRuntime) corev1.PodTemplateSpec {
func buildWorkerTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playground, backendRuntime *inferenceapi.BackendRuntime) corev1.PodTemplateSpec {
parser := helper.NewBackendRuntimeParser(backendRuntime)

envs := parser.Envs()
Expand Down Expand Up @@ -423,6 +441,24 @@ func buildWorkerTemplate(playground *inferenceapi.Playground, backendRuntime *in
},
}

// construct /dev/shm size
if models[0].Spec.InferenceConfig != nil && models[0].Spec.InferenceConfig.SharedMemorySize != nil {
template.Spec.Volumes = append(template.Spec.Volumes, corev1.Volume{
Name: "dshm",
VolumeSource: corev1.VolumeSource{
EmptyDir: &corev1.EmptyDirVolumeSource{
Medium: corev1.StorageMediumMemory,
SizeLimit: models[0].Spec.InferenceConfig.SharedMemorySize,
},
},
})

template.Spec.Containers[0].VolumeMounts = append(template.Spec.Containers[0].VolumeMounts, corev1.VolumeMount{
Name: "dshm",
MountPath: "/dev/shm",
})
}

return template
}

Expand Down
1 change: 1 addition & 0 deletions test/util/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ func MockASampleModel() *coreapi.OpenModel {
InferenceFlavors(
*wrapper.MakeFlavor("a100").SetRequest("nvidia.com/gpu", "1").Obj(),
*wrapper.MakeFlavor("a10").SetRequest("nvidia.com/gpu", "2").Obj()).
SharedMemorySize("1Gi").
Obj()
}

Expand Down
12 changes: 12 additions & 0 deletions test/util/validation/validate_playground.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,18 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground
return errors.New("command not right")
}
}

if models[0].Spec.InferenceConfig != nil && models[0].Spec.InferenceConfig.SharedMemorySize != nil {
if multiHost {
if *models[0].Spec.InferenceConfig.SharedMemorySize != *service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].EmptyDir.SizeLimit {
return fmt.Errorf("expected SharedMemorySize %s, got %s", models[0].Spec.InferenceConfig.SharedMemorySize.String(), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.LeaderTemplate.Spec.Volumes[0].EmptyDir.SizeLimit.String())
}
}
if *models[0].Spec.InferenceConfig.SharedMemorySize != *service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].EmptyDir.SizeLimit {
return fmt.Errorf("expected SharedMemorySize %s, got %s", models[0].Spec.InferenceConfig.SharedMemorySize.String(), service.Spec.WorkloadTemplate.LeaderWorkerTemplate.WorkerTemplate.Spec.Volumes[0].EmptyDir.SizeLimit.String())
}
}

return nil

}, util.IntegrationTimeout, util.Interval).Should(gomega.Succeed())
Expand Down
9 changes: 9 additions & 0 deletions test/util/wrapper/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,15 @@ func (w *ModelWrapper) Label(k, v string) *ModelWrapper {
return w
}

func (w *ModelWrapper) SharedMemorySize(v string) *ModelWrapper {
if w.Spec.InferenceConfig == nil {
w.Spec.InferenceConfig = &coreapi.InferenceConfig{}
}
value := resource.MustParse(v)
w.Spec.InferenceConfig.SharedMemorySize = &value
return w
}

func MakeFlavor(name string) *FlavorWrapper {
return &FlavorWrapper{
coreapi.Flavor{
Expand Down
Loading