Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,16 @@ require (
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/moby/spdystream v0.5.0 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.20.2 // indirect
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
Expand Down Expand Up @@ -46,6 +48,8 @@ github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/Z
github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y=
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
Expand All @@ -62,13 +66,17 @@ github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU=
github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
github.com/onsi/ginkgo/v2 v2.23.0 h1:FA1xjp8ieYDzlgS5ABTpdUDB7wtngggONc8a7ku2NqQ=
github.com/onsi/ginkgo/v2 v2.23.0/go.mod h1:zXTP6xIp3U8aVuXN8ENK9IXRaTjFnpVB9mGmaSRvxnM=
github.com/onsi/gomega v1.36.2 h1:koNYke6TVk6ZmnyHrCXba/T/MoLBXFjeC1PtvYgw0A8=
Expand Down
4 changes: 3 additions & 1 deletion test/e2e/playground_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
defer func() {
gomega.Expect(k8sClient.Delete(ctx, model)).To(gomega.Succeed())
}()
playground := wrapper.MakePlayground("qwen2-0--5b", ns.Name).ModelClaim("qwen2-0--5b").BackendRuntime("llmaz-ollama").Replicas(1).Obj()
playground := wrapper.MakePlayground("qwen2-0--5b", ns.Name).ModelClaim("qwen2-0--5b").BackendRuntime("llmaz-ollama").BackendRuntimeEnv("OLLAMA_HOST", "0.0.0.0:8080").Replicas(1).Obj()
gomega.Expect(k8sClient.Create(ctx, playground)).To(gomega.Succeed())
validation.ValidatePlayground(ctx, k8sClient, playground)
validation.ValidatePlaygroundStatusEqualTo(ctx, k8sClient, playground, inferenceapi.PlaygroundAvailable, "PlaygroundReady", metav1.ConditionTrue)
Expand Down Expand Up @@ -91,6 +91,7 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
validation.ValidateService(ctx, k8sClient, service)
validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue)
validation.ValidateServicePods(ctx, k8sClient, service)
validation.ValidateServiceAvaliable(ctx, k8sClient, cfg, service, validation.CheckServiceAvaliable)
})
ginkgo.It("Deploy a huggingface model with customized backendRuntime", func() {
backendRuntime := wrapper.MakeBackendRuntime("llmaz-llamacpp").
Expand All @@ -116,6 +117,7 @@ var _ = ginkgo.Describe("playground e2e tests", func() {
validation.ValidateService(ctx, k8sClient, service)
validation.ValidateServiceStatusEqualTo(ctx, k8sClient, service, inferenceapi.ServiceAvailable, "ServiceReady", metav1.ConditionTrue)
validation.ValidateServicePods(ctx, k8sClient, service)
validation.ValidateServiceAvaliable(ctx, k8sClient, cfg, service, validation.CheckServiceAvaliable)
})
ginkgo.It("Deploy a huggingface model with llama.cpp, HPA enabled", func() {
model := wrapper.MakeModel("qwen2-0-5b-gguf").FamilyName("qwen2").ModelSourceWithModelHub("Huggingface").ModelSourceWithModelID("Qwen/Qwen2-0.5B-Instruct-GGUF", "qwen2-0_5b-instruct-q5_k_m.gguf", "", nil, nil).Obj()
Expand Down
119 changes: 119 additions & 0 deletions test/util/validation/validate_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,23 @@ import (
"context"
"errors"
"fmt"
"io"
"net/http"
"os"
"os/signal"
"strconv"
"strings"
"syscall"

"github.com/onsi/gomega"
corev1 "k8s.io/api/core/v1"
apimeta "k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/portforward"
"k8s.io/client-go/transport/spdy"
"sigs.k8s.io/controller-runtime/pkg/client"
lws "sigs.k8s.io/lws/api/leaderworkerset/v1"

Expand Down Expand Up @@ -233,3 +243,112 @@ func ValidateServicePods(ctx context.Context, k8sClient client.Client, service *
return nil
}).Should(gomega.Succeed())
}

type CheckServiceAvailableFunc func() error

func ValidateServiceAvaliable(ctx context.Context, k8sClient client.Client, cfg *rest.Config, service *inferenceapi.Service, check CheckServiceAvailableFunc) {
gomega.Eventually(func() error {
pods := corev1.PodList{}
podSelector := client.MatchingLabels(map[string]string{
lws.SetNameLabelKey: service.Name,
})
if err := k8sClient.List(ctx, &pods, podSelector, client.InNamespace(service.Namespace)); err != nil {
return err
}
if len(pods.Items) != int(*service.Spec.Replicas)*int(*service.Spec.WorkloadTemplate.Size) {
return fmt.Errorf("pods number not right, want: %d, got: %d", int(*service.Spec.Replicas)*int(*service.Spec.WorkloadTemplate.Size), len(pods.Items))
}

var targetPod *corev1.Pod
for i := range pods.Items {
if pods.Items[i].Status.Phase == corev1.PodRunning {
targetPod = &pods.Items[i]
break
}
}

if targetPod == nil {
return fmt.Errorf("no running pods found for service %s", service.Name)
}

portForwardK8sClient, err := kubernetes.NewForConfig(cfg)
if err != nil {
return fmt.Errorf("init port forward client failed: %w", err)
}

targetPort := targetPod.Spec.Containers[0].Ports[0].ContainerPort

stopChan, readyChan := make(chan struct{}, 1), make(chan struct{}, 1)
req := portForwardK8sClient.CoreV1().RESTClient().Post().
Resource("pods").
Namespace(service.Namespace).
Name(targetPod.Name).
SubResource("portforward")

transport, upgrader, err := spdy.RoundTripperFor(cfg)
if err != nil {
return fmt.Errorf("creating round tripper failed: %v", err)
}

dialer := spdy.NewDialer(upgrader, &http.Client{Transport: transport}, "POST", req.URL())

// create port forwarder
fw, err := portforward.New(dialer, []string{fmt.Sprintf("%d:%d", modelSource.DEFAULT_BACKEND_PORT, targetPort)}, stopChan, readyChan, os.Stdout, os.Stderr)
if err != nil {
return fmt.Errorf("creating port forwarder failed: %v", err)
}
// stop port forward when done
defer fw.Close()

signals := make(chan os.Signal, 1)
signal.Notify(signals, os.Interrupt, syscall.SIGTERM)

go func() {
<-signals
fmt.Println("Received termination signal, shutting down port forward...")
close(stopChan)
}()

// wait for port forward to be ready
go func() {
if err = fw.ForwardPorts(); err != nil {
fmt.Printf("Error forwarding ports: %v\n", err)
close(stopChan)
}
}()
<-readyChan
return check()
}).Should(gomega.Succeed())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's not use eventually here, I don't think we want to forward the port for several time, eventually is used for status check, so we can wrap the check() function with eventually. Generally it looks like:

func ValidateServiceAvaliable() {
  // port forward logic
  select {
  case <-readyChan:
  case <-time.After(TIMEOUT):
    return fmt.Errorf("port forwarding timeout")
  }

  Eventually ({check()}, TIMEOUT, INTERVAL)
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. PTAL

}

func CheckServiceAvaliable() error {
url := fmt.Sprintf("http://localhost:%d/completions", modelSource.DEFAULT_BACKEND_PORT)
reqBody := `{"prompt":"What is the capital city of China?","stream":false}`

req, err := http.NewRequest("POST", url, strings.NewReader(reqBody))
if err != nil {
return err
}
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
return err
}
defer func() {
_ = resp.Body.Close()
}()

if resp.StatusCode != http.StatusOK {
return fmt.Errorf("error HTTP status code %d", resp.StatusCode)
}

body, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("error reading response: %v", err)
}

if !strings.Contains(strings.ToLower(string(body)), "beijing") {
return fmt.Errorf("error response body: %s", string(body))
}
return nil
}
Loading