feature(BackendRuntime): support lifeCycle hook fields for BackendRuntime

googs1025 · googs1025 · commit e91df2eb8f65 · 2025-03-10T22:06:15.000+08:00
diff --git a/api/inference/v1alpha1/backendruntime_types.go b/api/inference/v1alpha1/backendruntime_types.go
@@ -88,6 +88,9 @@ type BackendRuntimeSpec struct {
 	// Envs represents the environments set to the container.
 	// +optional
 	Envs []corev1.EnvVar `json:"envs,omitempty"`
+	// Lifecycle represents hooks executed during the lifecycle of the container.
+	// +optional
+	Lifecycle *corev1.Lifecycle `json:"lifecycle,omitempty"`
 	// Periodic probe of backend liveness.
 	// Backend will be restarted if the probe fails.
 	// Cannot be updated.
diff --git a/api/inference/v1alpha1/zz_generated.deepcopy.go b/api/inference/v1alpha1/zz_generated.deepcopy.go
diff --git a/pkg/controller/inference/playground_controller.go b/pkg/controller/inference/playground_controller.go
@@ -313,6 +313,9 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
 	// commands
 	commands := parser.Commands()
 
+	// lifecycle
+	lifecycle := parser.Lifecycle()
+
 	// probe
 	var livenessProbe, readinessProbe, startupProbe *corev1.Probe
 	if backendRuntime.Spec.StartupProbe != nil {
@@ -337,6 +340,7 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
 					Command:   commands,
 					Args:      args,
 					Env:       envs,
+					Lifecycle: lifecycle,
 					Ports: []corev1.ContainerPort{
 						{
 							Name:          "http",
diff --git a/pkg/controller_helper/backendruntime/backendruntime.go b/pkg/controller_helper/backendruntime/backendruntime.go
@@ -56,6 +56,10 @@ func (p *BackendRuntimeParser) Envs() []corev1.EnvVar {
 	return p.backendRuntime.Spec.Envs
 }
 
+func (p *BackendRuntimeParser) Lifecycle() *corev1.Lifecycle {
+	return p.backendRuntime.Spec.Lifecycle
+}
+
 func (p *BackendRuntimeParser) Args() ([]string, error) {
 	mainModel := p.models[0]
 
diff --git a/test/e2e/playground_test.go b/test/e2e/playground_test.go
@@ -17,13 +17,17 @@ limitations under the License.
 package e2e
 
 import (
+	"bytes"
 	"github.com/onsi/ginkgo/v2"
 	"github.com/onsi/gomega"
+	"io"
 	autoscalingv2 "k8s.io/api/autoscaling/v2"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/client"
 	testing "sigs.k8s.io/lws/test/testutils"
+	"strings"
 
 	inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
 	"github.com/inftyai/llmaz/test/util"
@@ -112,6 +116,66 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
 		validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue)
 		validation.ValidateServicePods(ctx, k8sClient, service)
 	})
+	ginkgo.It("Deploy a huggingface model with customized backendRuntime, and preStop is worked", func() {
+		backendRuntime := wrapper.MakeBackendRuntime("llmaz-llamacpp").
+			Image("ghcr.io/ggerganov/llama.cpp").Version("server").
+			Command([]string{"./llama-server"}).
+			Lifecycle(&corev1.Lifecycle{
+				PreStop: &corev1.LifecycleHandler{
+					Exec: &corev1.ExecAction{
+						Command: []string{"/bin/sh", "-c", "echo 'preStop hook executed' >> /proc/1/fd/1"},
+					},
+				},
+			}).
+			Arg("default", []string{"-m", "{{.ModelPath}}", "--host", "0.0.0.0", "--port", "8080"}).
+			Request("default", "cpu", "2").Request("default", "memory", "4Gi").Limit("default", "cpu", "4").Limit("default", "memory", "4Gi").Obj()
+		gomega.Expect(k8sClient.Create(ctx, backendRuntime)).To(gomega.Succeed())
+
+		model := wrapper.MakeModel("qwen2-0-5b-gguf").FamilyName("qwen2").ModelSourceWithModelHub("Huggingface").ModelSourceWithModelID("Qwen/Qwen2-0.5B-Instruct-GGUF", "qwen2-0_5b-instruct-q5_k_m.gguf", "", nil, nil).Obj()
+		gomega.Expect(k8sClient.Create(ctx, model)).To(gomega.Succeed())
+		defer func() {
+			gomega.Expect(k8sClient.Delete(ctx, model)).To(gomega.Succeed())
+		}()
+
+		playground := wrapper.MakePlayground("qwen2-0-5b-gguf", ns.Name).ModelClaim("qwen2-0-5b-gguf").BackendRuntime("llmaz-llamacpp").Replicas(1).Obj()
+		gomega.Expect(k8sClient.Create(ctx, playground)).To(gomega.Succeed())
+		validation.ValidatePlayground(ctx, k8sClient, playground)
+		validation.ValidatePlaygroundStatusEqualTo(ctx, k8sClient, playground, inferenceapi.PlaygroundAvailable, "PlaygroundReady", metav1.ConditionTrue)
+
+		service := &inferenceapi.Service{}
+		gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: playground.Name, Namespace: playground.Namespace}, service)).To(gomega.Succeed())
+		validation.ValidateService(ctx, k8sClient, service)
+		validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue)
+		validation.ValidateServicePods(ctx, k8sClient, service)
+
+		gomega.Expect(k8sClient.Delete(ctx, playground)).To(gomega.Succeed())
+		podList := &corev1.PodList{}
+		listOps := &client.ListOptions{Namespace: playground.Namespace}
+		gomega.Eventually(func() bool {
+			err := k8sClient.List(ctx, podList, listOps)
+			if err != nil {
+				return false
+			}
+			for _, pod := range podList.Items {
+				req := clientgoClient.CoreV1().Pods(playground.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{})
+				podLogs, err := req.Stream(ctx)
+				if err != nil {
+					return false
+				}
+				defer podLogs.Close()
+
+				buf := new(bytes.Buffer)
+				_, err = io.Copy(buf, podLogs)
+				if err != nil {
+					return false
+				}
+				if strings.Contains(buf.String(), "preStop hook executed") {
+					return true
+				}
+			}
+			return false
+		}, timeout, interval).Should(gomega.BeTrue())
+	})
 	ginkgo.It("Deploy a huggingface model with llama.cpp, HPA enabled", func() {
 		model := wrapper.MakeModel("qwen2-0-5b-gguf").FamilyName("qwen2").ModelSourceWithModelHub("Huggingface").ModelSourceWithModelID("Qwen/Qwen2-0.5B-Instruct-GGUF", "qwen2-0_5b-instruct-q5_k_m.gguf", "", nil, nil).Obj()
 		gomega.Expect(k8sClient.Create(ctx, model)).To(gomega.Succeed())
diff --git a/test/e2e/suit_test.go b/test/e2e/suit_test.go
@@ -30,6 +30,7 @@ import (
 	corev1 "k8s.io/api/core/v1"
 	"k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/scheme"
 	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/rest"
 	"sigs.k8s.io/controller-runtime/pkg/client"
 	"sigs.k8s.io/controller-runtime/pkg/client/config"
@@ -49,6 +50,7 @@ const (
 
 var cfg *rest.Config
 var k8sClient client.Client
+var clientgoClient kubernetes.Interface
 var ctx context.Context
 var cancel context.CancelFunc
 
@@ -85,6 +87,10 @@ var _ = BeforeSuite(func() {
 	Expect(err).NotTo(HaveOccurred())
 	Expect(k8sClient).NotTo(BeNil())
 
+	clientgoClient, err := kubernetes.NewForConfig(cfg)
+	Expect(err).NotTo(HaveOccurred())
+	Expect(clientgoClient).NotTo(BeNil())
+
 	readyForTesting(k8sClient)
 	Expect(os.Setenv("TEST_TYPE", "E2E")).Should(Succeed())
 
diff --git a/test/util/validation/validate_playground.go b/test/util/validation/validate_playground.go
@@ -164,6 +164,11 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground
 			return errors.New("command not right")
 		}
 
+		// compare lifecycle
+		if diff := cmp.Diff(parser.Lifecycle(), service.Spec.WorkloadTemplate.WorkerTemplate.Spec.Containers[0].Lifecycle); diff != "" {
+			return errors.New("lifecycle not right")
+		}
+
 		// compare fields only can be configured in backend.
 
 		if backendRuntime.Spec.StartupProbe != nil {
diff --git a/test/util/wrapper/backend.go b/test/util/wrapper/backend.go
@@ -62,6 +62,11 @@ func (w *BackendRuntimeWrapper) Command(commands []string) *BackendRuntimeWrappe
 	return w
 }
 
+func (w *BackendRuntimeWrapper) Lifecycle(lifecycle *corev1.Lifecycle) *BackendRuntimeWrapper {
+	w.Spec.Lifecycle = lifecycle
+	return w
+}
+
 func (w *BackendRuntimeWrapper) Arg(name string, args []string) *BackendRuntimeWrapper {
 	if w.Spec.RecommendedConfigs == nil {
 		w.Spec.RecommendedConfigs = []inferenceapi.RecommendedConfig{

Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,10 @@ func (p *BackendRuntimeParser) Envs() []corev1.EnvVar {`
`56`	`56`	`return p.backendRuntime.Spec.Envs`
`57`	`57`	`}`
`58`	`58`
	`59`	`+func (p BackendRuntimeParser) Lifecycle() corev1.Lifecycle {`
	`60`	`+ return p.backendRuntime.Spec.Lifecycle`
	`61`	`+}`
	`62`	`+`
`59`	`63`	`func (p *BackendRuntimeParser) Args() ([]string, error) {`
`60`	`64`	`mainModel := p.models[0]`
`61`	`65`