@@ -17,12 +17,20 @@ limitations under the License.
1717package e2e
1818
1919import (
20+ "bytes"
21+ "fmt"
22+ "io"
23+ "strings"
24+
2025 "github.com/onsi/ginkgo/v2"
2126 "github.com/onsi/gomega"
27+
2228 autoscalingv2 "k8s.io/api/autoscaling/v2"
2329 corev1 "k8s.io/api/core/v1"
2430 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2531 "k8s.io/apimachinery/pkg/types"
32+
33+ "sigs.k8s.io/controller-runtime/pkg/client"
2634 testing "sigs.k8s.io/lws/test/testutils"
2735
2836 inferenceapi "github.com/inftyai/llmaz/api/inference/v1alpha1"
@@ -112,6 +120,78 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
112120 validation .ValidateServiceStatusEqualTo (ctx , k8sClient , service , inferenceapi .ServiceAvailable , "ServiceReady" , metav1 .ConditionTrue )
113121 validation .ValidateServicePods (ctx , k8sClient , service )
114122 })
123+ ginkgo .It ("Deploy a huggingface model with customized backendRuntime, and postStart is worked" , func () {
124+ backendRuntime := wrapper .MakeBackendRuntime ("llmaz-llamacpp-with-prestop" ).
125+ Image ("ghcr.io/ggerganov/llama.cpp" ).Version ("server" ).
126+ Command ([]string {"./llama-server" }).
127+ Lifecycle (& corev1.Lifecycle {
128+ PostStart : & corev1.LifecycleHandler {
129+ Exec : & corev1.ExecAction {
130+ Command : []string {"/bin/sh" , "-c" , "echo 'postStart hook executed' >> /proc/1/fd/1" },
131+ },
132+ },
133+ }).
134+ Arg ("default" , []string {"-m" , "{{.ModelPath}}" , "--host" , "0.0.0.0" , "--port" , "8080" }).
135+ Request ("default" , "cpu" , "2" ).Request ("default" , "memory" , "4Gi" ).Limit ("default" , "cpu" , "4" ).Limit ("default" , "memory" , "4Gi" ).Obj ()
136+ gomega .Expect (k8sClient .Create (ctx , backendRuntime )).To (gomega .Succeed ())
137+
138+ model := wrapper .MakeModel ("qwen2-0-5b-gguf" ).FamilyName ("qwen2" ).ModelSourceWithModelHub ("Huggingface" ).ModelSourceWithModelID ("Qwen/Qwen2-0.5B-Instruct-GGUF" , "qwen2-0_5b-instruct-q5_k_m.gguf" , "" , nil , nil ).Obj ()
139+ gomega .Expect (k8sClient .Create (ctx , model )).To (gomega .Succeed ())
140+ defer func () {
141+ gomega .Expect (k8sClient .Delete (ctx , model )).To (gomega .Succeed ())
142+ }()
143+
144+ playground := wrapper .MakePlayground ("qwen2-0-5b-gguf-1" , ns .Name ).ModelClaim ("qwen2-0-5b-gguf" ).BackendRuntime ("llmaz-llamacpp" ).Replicas (1 ).Obj ()
145+ gomega .Expect (k8sClient .Create (ctx , playground )).To (gomega .Succeed ())
146+ validation .ValidatePlayground (ctx , k8sClient , playground )
147+ validation .ValidatePlaygroundStatusEqualTo (ctx , k8sClient , playground , inferenceapi .PlaygroundAvailable , "PlaygroundReady" , metav1 .ConditionTrue )
148+
149+ service := & inferenceapi.Service {}
150+ gomega .Expect (k8sClient .Get (ctx , types.NamespacedName {Name : playground .Name , Namespace : playground .Namespace }, service )).To (gomega .Succeed ())
151+ validation .ValidateService (ctx , k8sClient , service )
152+ validation .ValidateServiceStatusEqualTo (ctx , k8sClient , service , inferenceapi .ServiceAvailable , "ServiceReady" , metav1 .ConditionTrue )
153+ validation .ValidateServicePods (ctx , k8sClient , service )
154+
155+ gomega .Expect (k8sClient .Delete (ctx , playground )).To (gomega .Succeed ())
156+ podList := & corev1.PodList {}
157+ listOps := & client.ListOptions {Namespace : playground .Namespace }
158+ gomega .Eventually (func () bool {
159+ err := k8sClient .List (ctx , podList , listOps )
160+ if err != nil {
161+ return false
162+ }
163+ for _ , pod := range podList .Items {
164+ if pod .DeletionTimestamp != nil || pod .Status .Phase != corev1 .PodRunning {
165+ fmt .Println ("pod: " , pod .Name , "status: " , pod .Status .Phase )
166+ continue
167+ }
168+ req := clientgoClient .CoreV1 ().Pods (playground .Namespace ).GetLogs (pod .Name , & corev1.PodLogOptions {})
169+ podLogs , err := req .Stream (ctx )
170+ if err != nil || podLogs == nil {
171+ fmt .Println ("req.Stream error: " , err , "podLogs: " , podLogs )
172+ continue
173+ }
174+ defer func (podLogs io.ReadCloser ) {
175+ err := podLogs .Close ()
176+ if err != nil {
177+ fmt .Println ("podLogs.Close error: " , err )
178+ }
179+ }(podLogs )
180+
181+ buf := new (bytes.Buffer )
182+ _ , err = io .Copy (buf , podLogs )
183+ if err != nil {
184+ fmt .Println ("io.Copy error: " , err )
185+ continue
186+ }
187+ fmt .Println (buf .String ())
188+ if strings .Contains (buf .String (), "postStart hook executed" ) {
189+ return true
190+ }
191+ }
192+ return false
193+ }, timeout , interval ).Should (gomega .Succeed ())
194+ })
115195 ginkgo .It ("Deploy a huggingface model with llama.cpp, HPA enabled" , func () {
116196 model := wrapper .MakeModel ("qwen2-0-5b-gguf" ).FamilyName ("qwen2" ).ModelSourceWithModelHub ("Huggingface" ).ModelSourceWithModelID ("Qwen/Qwen2-0.5B-Instruct-GGUF" , "qwen2-0_5b-instruct-q5_k_m.gguf" , "" , nil , nil ).Obj ()
117197 gomega .Expect (k8sClient .Create (ctx , model )).To (gomega .Succeed ())
0 commit comments