Skip to content

Commit aca4f30

Browse files
committed
feature(BackendRuntime): support lifeCycle hook fields for BackendRuntime
1 parent a3a05fd commit aca4f30

File tree

8 files changed

+112
-0
lines changed

8 files changed

+112
-0
lines changed

api/inference/v1alpha1/backendruntime_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ type BackendRuntimeSpec struct {
8888
// Envs represents the environments set to the container.
8989
// +optional
9090
Envs []corev1.EnvVar `json:"envs,omitempty"`
91+
// Lifecycle represents hooks executed during the lifecycle of the container.
92+
// +optional
93+
Lifecycle *corev1.Lifecycle `json:"lifecycle,omitempty"`
9194
// Periodic probe of backend liveness.
9295
// Backend will be restarted if the probe fails.
9396
// Cannot be updated.

api/inference/v1alpha1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/controller/inference/playground_controller.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,9 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
313313
// commands
314314
commands := parser.Commands()
315315

316+
// lifecycle
317+
lifecycle := parser.Lifecycle()
318+
316319
// probe
317320
var livenessProbe, readinessProbe, startupProbe *corev1.Probe
318321
if backendRuntime.Spec.StartupProbe != nil {
@@ -337,6 +340,7 @@ func buildTemplate(models []*coreapi.OpenModel, playground *inferenceapi.Playgro
337340
Command: commands,
338341
Args: args,
339342
Env: envs,
343+
Lifecycle: lifecycle,
340344
Ports: []corev1.ContainerPort{
341345
{
342346
Name: "http",

pkg/controller_helper/backendruntime/backendruntime.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ func (p *BackendRuntimeParser) Envs() []corev1.EnvVar {
5656
return p.backendRuntime.Spec.Envs
5757
}
5858

59+
func (p *BackendRuntimeParser) Lifecycle() *corev1.Lifecycle {
60+
return p.backendRuntime.Spec.Lifecycle
61+
}
62+
5963
func (p *BackendRuntimeParser) Args() ([]string, error) {
6064
mainModel := p.models[0]
6165

test/e2e/playground_test.go

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,20 @@ limitations under the License.
1717
package e2e
1818

1919
import (
20+
"bytes"
21+
"fmt"
22+
"io"
23+
"strings"
24+
2025
"github.com/onsi/ginkgo/v2"
2126
"github.com/onsi/gomega"
27+
2228
autoscalingv2 "k8s.io/api/autoscaling/v2"
2329
corev1 "k8s.io/api/core/v1"
2430
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2531
"k8s.io/apimachinery/pkg/types"
32+
33+
"sigs.k8s.io/controller-runtime/pkg/client"
2634
testing "sigs.k8s.io/lws/test/testutils"
2735

2836
inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
@@ -112,6 +120,78 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
112120
validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue)
113121
validation.ValidateServicePods(ctx, k8sClient, service)
114122
})
123+
ginkgo.It("Deploy a huggingface model with customized backendRuntime, and preStop is worked", func() {
124+
backendRuntime := wrapper.MakeBackendRuntime("llmaz-llamacpp-with-prestop").
125+
Image("ghcr.io/ggerganov/llama.cpp").Version("server").
126+
Command([]string{"./llama-server"}).
127+
Lifecycle(&corev1.Lifecycle{
128+
PostStart: &corev1.LifecycleHandler{
129+
Exec: &corev1.ExecAction{
130+
Command: []string{"/bin/sh", "-c", "echo 'postStart hook executed' >> /proc/1/fd/1"},
131+
},
132+
},
133+
}).
134+
Arg("default", []string{"-m", "{{.ModelPath}}", "--host", "0.0.0.0", "--port", "8080"}).
135+
Request("default", "cpu", "2").Request("default", "memory", "4Gi").Limit("default", "cpu", "4").Limit("default", "memory", "4Gi").Obj()
136+
gomega.Expect(k8sClient.Create(ctx, backendRuntime)).To(gomega.Succeed())
137+
138+
model := wrapper.MakeModel("qwen2-0-5b-gguf").FamilyName("qwen2").ModelSourceWithModelHub("Huggingface").ModelSourceWithModelID("Qwen/Qwen2-0.5B-Instruct-GGUF", "qwen2-0_5b-instruct-q5_k_m.gguf", "", nil, nil).Obj()
139+
gomega.Expect(k8sClient.Create(ctx, model)).To(gomega.Succeed())
140+
defer func() {
141+
gomega.Expect(k8sClient.Delete(ctx, model)).To(gomega.Succeed())
142+
}()
143+
144+
playground := wrapper.MakePlayground("qwen2-0-5b-gguf-1", ns.Name).ModelClaim("qwen2-0-5b-gguf").BackendRuntime("llmaz-llamacpp").Replicas(1).Obj()
145+
gomega.Expect(k8sClient.Create(ctx, playground)).To(gomega.Succeed())
146+
validation.ValidatePlayground(ctx, k8sClient, playground)
147+
validation.ValidatePlaygroundStatusEqualTo(ctx, k8sClient, playground, inferenceapi.PlaygroundAvailable, "PlaygroundReady", metav1.ConditionTrue)
148+
149+
service := &inferenceapi.Service{}
150+
gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{Name: playground.Name, Namespace: playground.Namespace}, service)).To(gomega.Succeed())
151+
validation.ValidateService(ctx, k8sClient, service)
152+
validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue)
153+
validation.ValidateServicePods(ctx, k8sClient, service)
154+
155+
gomega.Expect(k8sClient.Delete(ctx, playground)).To(gomega.Succeed())
156+
podList := &corev1.PodList{}
157+
listOps := &client.ListOptions{Namespace: playground.Namespace}
158+
gomega.Eventually(func() bool {
159+
err := k8sClient.List(ctx, podList, listOps)
160+
if err != nil {
161+
return false
162+
}
163+
for _, pod := range podList.Items {
164+
if pod.DeletionTimestamp != nil || pod.Status.Phase != corev1.PodRunning {
165+
fmt.Println("pod: ", pod.Name, "status: ", pod.Status.Phase)
166+
continue
167+
}
168+
req := clientgoClient.CoreV1().Pods(playground.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{})
169+
podLogs, err := req.Stream(ctx)
170+
if err != nil || podLogs == nil {
171+
fmt.Println("req.Stream error: ", err, "podLogs: ", podLogs)
172+
continue
173+
}
174+
defer func(podLogs io.ReadCloser) {
175+
err := podLogs.Close()
176+
if err != nil {
177+
fmt.Println("podLogs.Close error: ", err)
178+
}
179+
}(podLogs)
180+
181+
buf := new(bytes.Buffer)
182+
_, err = io.Copy(buf, podLogs)
183+
if err != nil {
184+
fmt.Println("io.Copy error: ", err)
185+
continue
186+
}
187+
fmt.Println(buf.String())
188+
if strings.Contains(buf.String(), "postStart hook executed") {
189+
return true
190+
}
191+
}
192+
return false
193+
}, timeout, interval).Should(gomega.Succeed())
194+
})
115195
ginkgo.It("Deploy a huggingface model with llama.cpp, HPA enabled", func() {
116196
model := wrapper.MakeModel("qwen2-0-5b-gguf").FamilyName("qwen2").ModelSourceWithModelHub("Huggingface").ModelSourceWithModelID("Qwen/Qwen2-0.5B-Instruct-GGUF", "qwen2-0_5b-instruct-q5_k_m.gguf", "", nil, nil).Obj()
117197
gomega.Expect(k8sClient.Create(ctx, model)).To(gomega.Succeed())

test/e2e/suit_test.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
corev1 "k8s.io/api/core/v1"
3131
"k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/scheme"
3232
"k8s.io/apimachinery/pkg/types"
33+
"k8s.io/client-go/kubernetes"
3334
"k8s.io/client-go/rest"
3435
"sigs.k8s.io/controller-runtime/pkg/client"
3536
"sigs.k8s.io/controller-runtime/pkg/client/config"
@@ -49,6 +50,7 @@ const (
4950

5051
var cfg *rest.Config
5152
var k8sClient client.Client
53+
var clientgoClient kubernetes.Interface
5254
var ctx context.Context
5355
var cancel context.CancelFunc
5456

@@ -85,6 +87,10 @@ var _ = BeforeSuite(func() {
8587
Expect(err).NotTo(HaveOccurred())
8688
Expect(k8sClient).NotTo(BeNil())
8789

90+
clientgoClient, err := kubernetes.NewForConfig(cfg)
91+
Expect(err).NotTo(HaveOccurred())
92+
Expect(clientgoClient).NotTo(BeNil())
93+
8894
readyForTesting(k8sClient)
8995
Expect(os.Setenv("TEST_TYPE", "E2E")).Should(Succeed())
9096

test/util/validation/validate_playground.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@ func ValidatePlayground(ctx context.Context, k8sClient client.Client, playground
164164
return errors.New("command not right")
165165
}
166166

167+
// compare lifecycle
168+
if diff := cmp.Diff(parser.Lifecycle(), service.Spec.WorkloadTemplate.WorkerTemplate.Spec.Containers[0].Lifecycle); diff != "" {
169+
return errors.New("lifecycle not right")
170+
}
171+
167172
// compare fields only can be configured in backend.
168173

169174
if backendRuntime.Spec.StartupProbe != nil {

test/util/wrapper/backend.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,11 @@ func (w *BackendRuntimeWrapper) Command(commands []string) *BackendRuntimeWrappe
6262
return w
6363
}
6464

65+
func (w *BackendRuntimeWrapper) Lifecycle(lifecycle *corev1.Lifecycle) *BackendRuntimeWrapper {
66+
w.Spec.Lifecycle = lifecycle
67+
return w
68+
}
69+
6570
func (w *BackendRuntimeWrapper) Arg(name string, args []string) *BackendRuntimeWrapper {
6671
if w.Spec.RecommendedConfigs == nil {
6772
w.Spec.RecommendedConfigs = []inferenceapi.RecommendedConfig{

0 commit comments

Comments
 (0)