@@ -17,13 +17,17 @@ limitations under the License.
1717package e2e
1818
1919import (
20+ "bytes"
2021 "github.com/onsi/ginkgo/v2"
2122 "github.com/onsi/gomega"
23+ "io"
2224 autoscalingv2 "k8s.io/api/autoscaling/v2"
2325 corev1 "k8s.io/api/core/v1"
2426 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2527 "k8s.io/apimachinery/pkg/types"
28+ "sigs.k8s.io/controller-runtime/pkg/client"
2629 testing "sigs.k8s.io/lws/test/testutils"
30+ "strings"
2731
2832 inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
2933 "github.com/inftyai/llmaz/test/util"
@@ -112,6 +116,66 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
112116 validation .ValidateServiceStatusEqualTo (ctx , k8sClient , service , inferenceapi .ServiceAvailable , "ServiceReady" , metav1 .ConditionTrue )
113117 validation .ValidateServicePods (ctx , k8sClient , service )
114118 })
119+ ginkgo .It ("Deploy a huggingface model with customized backendRuntime, and preStop is worked" , func () {
120+ backendRuntime := wrapper .MakeBackendRuntime ("llmaz-llamacpp" ).
121+ Image ("ghcr.io/ggerganov/llama.cpp" ).Version ("server" ).
122+ Command ([]string {"./llama-server" }).
123+ Lifecycle (& corev1.Lifecycle {
124+ PreStop : & corev1.LifecycleHandler {
125+ Exec : & corev1.ExecAction {
126+ Command : []string {"/bin/sh" , "-c" , "echo 'preStop hook executed' >> /proc/1/fd/1" },
127+ },
128+ },
129+ }).
130+ Arg ("default" , []string {"-m" , "{{.ModelPath}}" , "--host" , "0.0.0.0" , "--port" , "8080" }).
131+ Request ("default" , "cpu" , "2" ).Request ("default" , "memory" , "4Gi" ).Limit ("default" , "cpu" , "4" ).Limit ("default" , "memory" , "4Gi" ).Obj ()
132+ gomega .Expect (k8sClient .Create (ctx , backendRuntime )).To (gomega .Succeed ())
133+
134+ model := wrapper .MakeModel ("qwen2-0-5b-gguf" ).FamilyName ("qwen2" ).ModelSourceWithModelHub ("Huggingface" ).ModelSourceWithModelID ("Qwen/Qwen2-0.5B-Instruct-GGUF" , "qwen2-0_5b-instruct-q5_k_m.gguf" , "" , nil , nil ).Obj ()
135+ gomega .Expect (k8sClient .Create (ctx , model )).To (gomega .Succeed ())
136+ defer func () {
137+ gomega .Expect (k8sClient .Delete (ctx , model )).To (gomega .Succeed ())
138+ }()
139+
140+ playground := wrapper .MakePlayground ("qwen2-0-5b-gguf" , ns .Name ).ModelClaim ("qwen2-0-5b-gguf" ).BackendRuntime ("llmaz-llamacpp" ).Replicas (1 ).Obj ()
141+ gomega .Expect (k8sClient .Create (ctx , playground )).To (gomega .Succeed ())
142+ validation .ValidatePlayground (ctx , k8sClient , playground )
143+ validation .ValidatePlaygroundStatusEqualTo (ctx , k8sClient , playground , inferenceapi .PlaygroundAvailable , "PlaygroundReady" , metav1 .ConditionTrue )
144+
145+ service := & inferenceapi.Service {}
146+ gomega .Expect (k8sClient .Get (ctx , types.NamespacedName {Name : playground .Name , Namespace : playground .Namespace }, service )).To (gomega .Succeed ())
147+ validation .ValidateService (ctx , k8sClient , service )
148+ validation .ValidateServiceStatusEqualTo (ctx , k8sClient , service , inferenceapi .ServiceAvailable , "ServiceReady" , metav1 .ConditionTrue )
149+ validation .ValidateServicePods (ctx , k8sClient , service )
150+
151+ gomega .Expect (k8sClient .Delete (ctx , playground )).To (gomega .Succeed ())
152+ podList := & corev1.PodList {}
153+ listOps := & client.ListOptions {Namespace : playground .Namespace }
154+ gomega .Eventually (func () bool {
155+ err := k8sClient .List (ctx , podList , listOps )
156+ if err != nil {
157+ return false
158+ }
159+ for _ , pod := range podList .Items {
160+ req := clientgoClient .CoreV1 ().Pods (playground .Namespace ).GetLogs (pod .Name , & corev1.PodLogOptions {})
161+ podLogs , err := req .Stream (ctx )
162+ if err != nil {
163+ return false
164+ }
165+ defer podLogs .Close ()
166+
167+ buf := new (bytes.Buffer )
168+ _ , err = io .Copy (buf , podLogs )
169+ if err != nil {
170+ return false
171+ }
172+ if strings .Contains (buf .String (), "preStop hook executed" ) {
173+ return true
174+ }
175+ }
176+ return false
177+ }, timeout , interval ).Should (gomega .BeTrue ())
178+ })
115179 ginkgo .It ("Deploy a huggingface model with llama.cpp, HPA enabled" , func () {
116180 model := wrapper .MakeModel ("qwen2-0-5b-gguf" ).FamilyName ("qwen2" ).ModelSourceWithModelHub ("Huggingface" ).ModelSourceWithModelID ("Qwen/Qwen2-0.5B-Instruct-GGUF" , "qwen2-0_5b-instruct-q5_k_m.gguf" , "" , nil , nil ).Obj ()
117181 gomega .Expect (k8sClient .Create (ctx , model )).To (gomega .Succeed ())
0 commit comments