diff --git a/Makefile b/Makefile index 41387839..65f28516 100644 --- a/Makefile +++ b/Makefile @@ -316,11 +316,14 @@ helm: manifests kustomize helmify .PHONY: helm-install helm-install: helm - helm upgrade --install llmaz ./chart -f ./chart/values.global.yaml + helm upgrade --install llmaz ./chart -f ./chart/values.global.yaml --dependency-update .PHONY: helm-upgrade helm-upgrade: image-push artifacts helm-install +.PHONY: install-chatbot +install-chatbot: helm-install + .PHONY: helm-package helm-package: helm # Make sure will alwasy start with a new line. diff --git a/README.md b/README.md index 1645ae75..e2f8e439 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ Easy, advanced inference platform for large language models on Kubernetes - **Various Model Providers**: llmaz supports a wide range of model providers, such as [HuggingFace](https://huggingface.co/), [ModelScope](https://www.modelscope.cn), ObjectStores. llmaz will automatically handle the model loading, requiring no effort from users. - **Multi-Host Support**: llmaz supports both single-host and multi-host scenarios with [LWS](https://github.com/kubernetes-sigs/lws) from day 0. - **Scaling Efficiency**: llmaz supports horizontal scaling with [HPA](./docs/examples/hpa/README.md) by default and will integrate with autoscaling components like [Cluster-Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) or [Karpenter](https://github.com/kubernetes-sigs/karpenter) for smart scaling across different clouds. +- **Build-in ChatUI**: Out-of-the-box chatbot support with the integration of [Open WebUI](https://github.com/open-webui/open-webui), see configurations [here](./docs/open-webui.md). ## Quick Start diff --git a/chart/Chart.lock b/chart/Chart.lock new file mode 100644 index 00000000..a0da65ee --- /dev/null +++ b/chart/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: open-webui + repository: https://helm.openwebui.com/ + version: 6.4.0 +digest: sha256:2520f6e26f2e6fd3e51c5f7f940eef94217c125a9828b0f59decedbecddcdb29 +generated: "2025-04-21T00:50:06.532039+08:00" diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 2aec1a89..02128132 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -19,3 +19,9 @@ version: 0.0.8 # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. appVersion: 0.1.2 + +dependencies: + - name: open-webui + version: "6.4.0" + repository: "https://helm.openwebui.com/" + condition: open-webui.enabled diff --git a/chart/templates/backends/llamacpp.yaml b/chart/templates/backends/llamacpp.yaml index eaeecd8e..150e2378 100644 --- a/chart/templates/backends/llamacpp.yaml +++ b/chart/templates/backends/llamacpp.yaml @@ -1,4 +1,4 @@ -{{- if .Values.backendRuntime.enable -}} +{{- if .Values.backendRuntime.enabled -}} apiVersion: inference.llmaz.io/v1alpha1 kind: BackendRuntime metadata: diff --git a/chart/templates/backends/ollama.yaml b/chart/templates/backends/ollama.yaml index 70b68fda..097e7ba4 100644 --- a/chart/templates/backends/ollama.yaml +++ b/chart/templates/backends/ollama.yaml @@ -1,4 +1,4 @@ -{{- if .Values.backendRuntime.enable -}} +{{- if .Values.backendRuntime.enabled -}} apiVersion: inference.llmaz.io/v1alpha1 kind: BackendRuntime metadata: diff --git a/chart/templates/backends/sglang.yaml b/chart/templates/backends/sglang.yaml index 710382c5..2c5a9238 100644 --- a/chart/templates/backends/sglang.yaml +++ b/chart/templates/backends/sglang.yaml @@ -1,4 +1,4 @@ -{{- if .Values.backendRuntime.enable -}} +{{- if .Values.backendRuntime.enabled -}} apiVersion: inference.llmaz.io/v1alpha1 kind: BackendRuntime metadata: diff --git a/chart/templates/backends/tgi.yaml b/chart/templates/backends/tgi.yaml index dd9af4a1..693964ee 100644 --- a/chart/templates/backends/tgi.yaml +++ b/chart/templates/backends/tgi.yaml @@ -1,4 +1,4 @@ -{{- if .Values.backendRuntime.enable -}} +{{- if .Values.backendRuntime.enabled -}} apiVersion: inference.llmaz.io/v1alpha1 kind: BackendRuntime metadata: diff --git a/chart/templates/backends/vllm.yaml b/chart/templates/backends/vllm.yaml index 35d257ab..a65f6a5c 100644 --- a/chart/templates/backends/vllm.yaml +++ b/chart/templates/backends/vllm.yaml @@ -1,4 +1,4 @@ -{{- if .Values.backendRuntime.enable -}} +{{- if .Values.backendRuntime.enabled -}} apiVersion: inference.llmaz.io/v1alpha1 kind: BackendRuntime metadata: diff --git a/chart/templates/lws/leaderworkerset.yaml b/chart/templates/lws/leaderworkerset.yaml index 37c11474..ff5555b9 100644 --- a/chart/templates/lws/leaderworkerset.yaml +++ b/chart/templates/lws/leaderworkerset.yaml @@ -1,4 +1,4 @@ -{{- if .Values.leaderWorkerSet.enable -}} +{{- if .Values.leaderWorkerSet.enabled -}} apiVersion: v1 kind: Namespace metadata: diff --git a/chart/templates/prometheus/prometheus.yaml b/chart/templates/prometheus/prometheus.yaml index a82bda4a..cbb44f6a 100644 --- a/chart/templates/prometheus/prometheus.yaml +++ b/chart/templates/prometheus/prometheus.yaml @@ -1,4 +1,4 @@ -{{- if .Values.prometheus.enable }} +{{- if .Values.prometheus.enabled }} {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }} {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }} {{- end }} diff --git a/chart/templates/prometheus/service-monitor.yaml b/chart/templates/prometheus/service-monitor.yaml index 37fd07c0..77b9c387 100644 --- a/chart/templates/prometheus/service-monitor.yaml +++ b/chart/templates/prometheus/service-monitor.yaml @@ -1,4 +1,4 @@ -{{- if .Values.prometheus.enable }} +{{- if .Values.prometheus.enabled }} {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }} {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }} {{- end }} diff --git a/chart/templates/prometheus/serviceaccount.yaml b/chart/templates/prometheus/serviceaccount.yaml index 1d200445..0849c20f 100644 --- a/chart/templates/prometheus/serviceaccount.yaml +++ b/chart/templates/prometheus/serviceaccount.yaml @@ -1,4 +1,4 @@ -{{- if .Values.prometheus.enable }} +{{- if .Values.prometheus.enabled }} {{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }} {{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }} {{- end }} diff --git a/chart/values.global.yaml b/chart/values.global.yaml index 58722e2c..8d6ed9a3 100644 --- a/chart/values.global.yaml +++ b/chart/values.global.yaml @@ -1,7 +1,7 @@ fullnameOverride: "llmaz" backendRuntime: - enable: true + enabled: true llamacpp: image: repository: ghcr.io/ggerganov/llama.cpp @@ -24,8 +24,26 @@ backendRuntime: tag: v0.7.3 leaderWorkerSet: - enable: true + enabled: true prometheus: # Prometheus is required to enable smart routing. - enable: true + enabled: true + +open-webui: + enabled: false + persistence: + enabled: false + enableOpenaiApi: true + openaiBaseApiUrl: "https://api.openai.com/v1" + extraEnvVars: + - name: OPENAI_API_KEY + value: "ChangeMe" + ollama: + enabled: false + pipelines: + enabled: false + tika: + enabled: false + redis-cluster: + enabled: false diff --git a/docs/examples/llamacpp/playground.yaml b/docs/examples/llamacpp/playground.yaml index 95e6524f..62cd3dc2 100644 --- a/docs/examples/llamacpp/playground.yaml +++ b/docs/examples/llamacpp/playground.yaml @@ -8,6 +8,5 @@ spec: modelName: qwen2-0--5b-gguf backendRuntimeConfig: backendName: llamacpp - configName: default args: - -fa # use flash attention diff --git a/docs/open-webui.md b/docs/open-webui.md new file mode 100644 index 00000000..638a2310 --- /dev/null +++ b/docs/open-webui.md @@ -0,0 +1,47 @@ +# Open-WebUI + +[Open WebUI](https://github.com/open-webui/open-webui) is a user-friendly AI interface with OpenAI-compatible APIs, serving as the default chatbot for llmaz. + +## Prerequisites + +- Make sure you're located in **llmaz-system** namespace, haven't tested with other namespaces. +- Make sure [EnvoyGateway](https://github.com/envoyproxy/gateway) and [Envoy AI Gateway](https://github.com/envoyproxy/ai-gateway) are installed, both of them are installed by default in llmaz. + +## How to use + +1. Enable Open WebUI in the `values.global.yaml` file, open-webui is disabled by default. + + ```yaml + open-webui: + enabled: true + ``` + + > Optional to set the `persistence=true` to persist the data, recommended for production. + +2. Run `kubectl get svc -n envoy-gateway-system` to list out the services, the output looks like: + + ```cmd + envoy-default-default-envoy-ai-gateway-dbec795a LoadBalancer 10.96.145.150 80:30548/TCP 132m + envoy-gateway ClusterIP 10.96.52.76 18000/TCP,18001/TCP,18002/TCP,19001/TCP 172m + ``` + +3. Set `openaiBaseApiUrl` in the `values.global.yaml` like: + + ```yaml + open-webui: + enabled: true + openaiBaseApiUrl: http://envoy-default-default-envoy-ai-gateway-dbec795a.envoy-gateway-system.svc.cluster.local/v1 + ``` + +4. Run `make install-chatbot` to install the chatbot. + +5. Port forwarding by: + ``` + kubectl port-forward svc/open-webui 8080:80 + ``` + +6. Visit [http://localhost:8080](http://localhost:8080) to access the Open WebUI. + +7. Configure the administrator for the first time. + +**That's it! You can now chat with llmaz models with Open WebUI.**