diff --git a/go.mod b/go.mod index bc795e80..c02da5e6 100644 --- a/go.mod +++ b/go.mod @@ -42,13 +42,16 @@ require ( github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect github.com/google/uuid v1.6.0 // indirect + github.com/gorilla/websocket v1.5.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.17.9 // indirect github.com/mailru/easyjson v0.7.7 // indirect + github.com/moby/spdystream v0.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_golang v1.20.2 // indirect diff --git a/go.sum b/go.sum index 4d79cb28..0b0f9b04 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= @@ -46,6 +48,8 @@ github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/Z github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= +github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= @@ -62,6 +66,8 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0 github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU= +github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -69,6 +75,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= +github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= github.com/onsi/ginkgo/v2 v2.23.0 h1:FA1xjp8ieYDzlgS5ABTpdUDB7wtngggONc8a7ku2NqQ= github.com/onsi/ginkgo/v2 v2.23.0/go.mod h1:zXTP6xIp3U8aVuXN8ENK9IXRaTjFnpVB9mGmaSRvxnM= github.com/onsi/gomega v1.36.2 h1:koNYke6TVk6ZmnyHrCXba/T/MoLBXFjeC1PtvYgw0A8= diff --git a/test/e2e/playground_test.go b/test/e2e/playground_test.go index 01a35ee2..9e73f434 100644 --- a/test/e2e/playground_test.go +++ b/test/e2e/playground_test.go @@ -63,7 +63,7 @@ var _ = ginkgo.Describe("playground e2e tests", func() { defer func() { gomega.Expect(k8sClient.Delete(ctx, model)).To(gomega.Succeed()) }() - playground := wrapper.MakePlayground("qwen2-0--5b", ns.Name).ModelClaim("qwen2-0--5b").BackendRuntime("llmaz-ollama").Replicas(1).Obj() + playground := wrapper.MakePlayground("qwen2-0--5b", ns.Name).ModelClaim("qwen2-0--5b").BackendRuntime("llmaz-ollama").BackendRuntimeEnv("OLLAMA_HOST", "0.0.0.0:8080").Replicas(1).Obj() gomega.Expect(k8sClient.Create(ctx, playground)).To(gomega.Succeed()) validation.ValidatePlayground(ctx, k8sClient, playground) validation.ValidatePlaygroundStatusEqualTo(ctx, k8sClient, playground, inferenceapi.PlaygroundAvailable, "PlaygroundReady", metav1.ConditionTrue) @@ -91,6 +91,7 @@ var _ = ginkgo.Describe("playground e2e tests", func() { validation.ValidateService(ctx, k8sClient, service) validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue) validation.ValidateServicePods(ctx, k8sClient, service) + gomega.Expect(validation.ValidateServiceAvaliable(ctx, k8sClient, cfg, service, validation.CheckServiceAvaliable)).To(gomega.Succeed()) }) ginkgo.It("Deploy a huggingface model with customized backendRuntime", func() { backendRuntime := wrapper.MakeBackendRuntime("llmaz-llamacpp"). @@ -116,6 +117,7 @@ var _ = ginkgo.Describe("playground e2e tests", func() { validation.ValidateService(ctx, k8sClient, service) validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue) validation.ValidateServicePods(ctx, k8sClient, service) + gomega.Expect(validation.ValidateServiceAvaliable(ctx, k8sClient, cfg, service, validation.CheckServiceAvaliable)).To(gomega.Succeed()) }) ginkgo.It("Deploy a huggingface model with llama.cpp, HPA enabled", func() { model := wrapper.MakeModel("qwen2-0-5b-gguf").FamilyName("qwen2").ModelSourceWithModelHub("Huggingface").ModelSourceWithModelID("Qwen/Qwen2-0.5B-Instruct-GGUF", "qwen2-0_5b-instruct-q5_k_m.gguf", "", nil, nil).Obj() diff --git a/test/util/validation/validate_service.go b/test/util/validation/validate_service.go index bf6408db..4e7b24ca 100644 --- a/test/util/validation/validate_service.go +++ b/test/util/validation/validate_service.go @@ -20,13 +20,23 @@ import ( "context" "errors" "fmt" + "io" + "net/http" + "os" + "os/signal" "strconv" + "strings" + "syscall" "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/portforward" + "k8s.io/client-go/transport/spdy" "sigs.k8s.io/controller-runtime/pkg/client" lws "sigs.k8s.io/lws/api/leaderworkerset/v1" @@ -233,3 +243,108 @@ func ValidateServicePods(ctx context.Context, k8sClient client.Client, service * return nil }).Should(gomega.Succeed()) } + +type CheckServiceAvailableFunc func() error + +func ValidateServiceAvaliable(ctx context.Context, k8sClient client.Client, cfg *rest.Config, service *inferenceapi.Service, check CheckServiceAvailableFunc) error { + pods := corev1.PodList{} + podSelector := client.MatchingLabels(map[string]string{ + lws.SetNameLabelKey: service.Name, + }) + if err := k8sClient.List(ctx, &pods, podSelector, client.InNamespace(service.Namespace)); err != nil { + return err + } + if len(pods.Items) != int(*service.Spec.Replicas)*int(*service.Spec.WorkloadTemplate.Size) { + return fmt.Errorf("pods number not right, want: %d, got: %d", int(*service.Spec.Replicas)*int(*service.Spec.WorkloadTemplate.Size), len(pods.Items)) + } + + var targetPod *corev1.Pod + for i := range pods.Items { + if pods.Items[i].Status.Phase == corev1.PodRunning { + targetPod = &pods.Items[i] + break + } + } + + if targetPod == nil { + return fmt.Errorf("no running pods found for service %s", service.Name) + } + + portForwardK8sClient, err := kubernetes.NewForConfig(cfg) + if err != nil { + return fmt.Errorf("init port forward client failed: %w", err) + } + + targetPort := targetPod.Spec.Containers[0].Ports[0].ContainerPort + stopChan, readyChan := make(chan struct{}, 1), make(chan struct{}, 1) + req := portForwardK8sClient.CoreV1().RESTClient().Post(). + Resource("pods"). + Namespace(service.Namespace). + Name(targetPod.Name). + SubResource("portforward") + + transport, upgrader, err := spdy.RoundTripperFor(cfg) + if err != nil { + return fmt.Errorf("creating round tripper failed: %v", err) + } + + dialer := spdy.NewDialer(upgrader, &http.Client{Transport: transport}, "POST", req.URL()) + // create port forwarder + fw, err := portforward.New(dialer, []string{fmt.Sprintf("%d:%d", modelSource.DEFAULT_BACKEND_PORT, targetPort)}, stopChan, readyChan, os.Stdout, os.Stderr) + if err != nil { + return fmt.Errorf("creating port forwarder failed: %v", err) + } + // stop port forward when done + defer fw.Close() + signals := make(chan os.Signal, 1) + signal.Notify(signals, os.Interrupt, syscall.SIGTERM) + + go func() { + <-signals + fmt.Println("Received termination signal, shutting down port forward...") + close(stopChan) + }() + + // wait for port forward to be ready + go func() { + if err = fw.ForwardPorts(); err != nil { + fmt.Printf("Error forwarding ports: %v\n", err) + close(stopChan) + } + }() + <-readyChan + gomega.Eventually(check()).Should(gomega.Succeed()) + return nil +} + +func CheckServiceAvaliable() error { + url := fmt.Sprintf("http://localhost:%d/completions", modelSource.DEFAULT_BACKEND_PORT) + reqBody := `{"prompt":"What is the capital city of China?","stream":false}` + + req, err := http.NewRequest("POST", url, strings.NewReader(reqBody)) + if err != nil { + return err + } + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + return err + } + defer func() { + _ = resp.Body.Close() + }() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("error HTTP status code %d", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("error reading response: %v", err) + } + + if !strings.Contains(strings.ToLower(string(body)), "beijing") { + return fmt.Errorf("error response body: %s", string(body)) + } + return nil +}