Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,14 @@ envtest: $(ENVTEST) ## Download envtest-setup locally if necessary.
$(ENVTEST): $(LOCALBIN)
test -s $(LOCALBIN)/setup-envtest || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-runtime/tools/setup-envtest@latest

.PHONY: install-prometheus
install-prometheus:
kubectl apply --server-side -k config/prometheus

.PHONY: uninstall-prometheus
uninstall-prometheus:
kubectl delete -k config/prometheus

##@Release

.PHONY: artifacts
Expand All @@ -300,7 +308,7 @@ HELMIFY ?= $(LOCALBIN)/helmify
.PHONY: helmify
helmify: $(HELMIFY) ## Download helmify locally if necessary.
$(HELMIFY): $(LOCALBIN)
test -s $(LOCALBIN)/helmify || GOBIN=$(LOCALBIN) go install github.com/arttor/helmify/cmd/[email protected].17
test -s $(LOCALBIN)/helmify || GOBIN=$(LOCALBIN) go install github.com/arttor/helmify/cmd/[email protected].18

.PHONY: helm
helm: manifests kustomize helmify
Expand Down
22 changes: 22 additions & 0 deletions chart/templates/prometheus/prometheus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{{- if .Values.prometheus.enable }}
{{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
{{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }}
{{- end }}
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: {{ include "chart.fullname" . }}-prometheus
spec:
serviceAccountName: {{ include "chart.fullname" . }}-prometheus
# Associated ServiceMonitor selector
serviceMonitorSelector:
# Need to match the label in ServiceMonitor
# https://github.com/kubernetes-sigs/jobset/blob/main/config/components/prometheus/monitor.yaml#L7
matchLabels:
control-plane: controller-manager
{{- include "chart.selectorLabels" . | nindent 4 }}
resources:
requests:
memory: 400Mi
enableAdminAPI: false
{{- end }}
27 changes: 27 additions & 0 deletions chart/templates/prometheus/service-monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{{- if .Values.prometheus.enable }}
{{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
{{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }}
{{- end }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ include "chart.fullname" . }}-controller-manager-metrics-monitor
labels:
app.kubernetes.io/component: metrics
app.kubernetes.io/created-by: llmaz
app.kubernetes.io/part-of: llmaz
control-plane: controller-manager
{{- include "chart.selectorLabels" . | nindent 4 }}
spec:
endpoints:
- bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token
path: /metrics
port: https
scheme: https
tlsConfig:
insecureSkipVerify: true
selector:
matchLabels:
control-plane: controller-manager
{{- include "chart.selectorLabels" . | nindent 4 }}
{{- end }}
42 changes: 42 additions & 0 deletions chart/templates/prometheus/serviceaccount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{{- if .Values.prometheus.enable }}
{{- if not (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
{{- fail "The cluster does not support the required API resource `monitoring.coreos.com/v1/ServiceMonitor`." }}
{{- end }}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "chart.fullname" . }}-prometheus
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ include "chart.fullname" . }}-prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/metrics
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ include "chart.fullname" . }}-prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ include "chart.fullname" . }}-prometheus
subjects:
- kind: ServiceAccount
name: {{ include "chart.fullname" . }}-prometheus
namespace: llmaz-system
{{- end }}
4 changes: 4 additions & 0 deletions chart/values.global.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,7 @@ leaderWorkerSet:
image:
repository: registry.k8s.io/lws/lws
tag: v0.5.0

prometheus:
# -- Whether to enable Prometheus metrics exporting.
enable: false
11 changes: 5 additions & 6 deletions config/default/manager_metrics_patch.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# This patch exposes 8443 port used by metrics service
apiVersion: apps/v1
kind: Deployment
metadata:
Expand All @@ -8,8 +7,8 @@ spec:
template:
spec:
containers:
- name: manager
ports:
- containerPort: 8443
name: metrics
protocol: TCP
- name: manager
ports:
- containerPort: 8443
name: metrics
protocol: TCP
5 changes: 5 additions & 0 deletions config/prometheus/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
namespace: llmaz-system
namePrefix: llmaz-

resources:
- monitor.yaml
- prometheus.yaml
- serviceaccount.yaml
4 changes: 1 addition & 3 deletions config/prometheus/monitor.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@ metadata:
labels:
control-plane: controller-manager
app.kubernetes.io/name: servicemonitor
app.kubernetes.io/instance: controller-manager-metrics-monitor
app.kubernetes.io/component: metrics
app.kubernetes.io/created-by: llmaz
app.kubernetes.io/part-of: llmaz
app.kubernetes.io/managed-by: kustomize
name: controller-manager-metrics-monitor
namespace: system
spec:
Expand All @@ -22,4 +20,4 @@ spec:
insecureSkipVerify: true
selector:
matchLabels:
control-plane: controller-manager
app.kubernetes.io/name: service
16 changes: 16 additions & 0 deletions config/prometheus/prometheus.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: monitoring.coreos.com/v1
kind: Prometheus
metadata:
name: prometheus
namespace: system
spec:
serviceAccountName: llmaz-prometheus
# Associated ServiceMonitor selector
serviceMonitorSelector:
# Need to match the label in ServiceMonitor
matchLabels:
control-plane: controller-manager
resources:
requests:
memory: 400Mi
enableAdminAPI: false
37 changes: 37 additions & 0 deletions config/prometheus/serviceaccount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: prometheus
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus
rules:
- apiGroups: [""]
resources:
- nodes
- nodes/metrics
- services
- endpoints
- pods
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources:
- configmaps
verbs: ["get"]
- nonResourceURLs: ["/metrics"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus
subjects:
- kind: ServiceAccount
name: prometheus
namespace: llmaz-system
76 changes: 76 additions & 0 deletions docs/prometheus-operator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Install Prometheus Operator Guide

Currently, llmaz has already integrated metrics. This document provides deployment steps explaining how to install and configure Prometheus Operator in a Kubernetes cluster.

### Install the prometheus operator

Please follow the [documentation](https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/getting-started/installation.md) to install

```bash
# Installing the prometheus operator
root@VM-0-5-ubuntu:/home/ubuntu# kubectl get pods
NAME READY STATUS RESTARTS AGE
prometheus-operator-55b5c96cf8-jl2nx 1/1 Running 0 12s
```
Ensure that the Prometheus Operator Pod is running successfully.

### Install the ServiceMonitor CR for llmaz

To enable monitoring for the llmaz system, you need to install the ServiceMonitor custom resource (CR).
You can either modify the Helm chart prometheus according to the [documentation](./../../chart/values.global.yaml) or use `make install-prometheus` in Makefile.

- Using Helm Chart: to modify the values.global.yaml
```yaml
prometheus:
# -- Whether to enable Prometheus metrics exporting.
enable: true
```
- Using Makefile Command: `make install-prometheus `
```bash
root@VM-0-5-ubuntu:/home/ubuntu/llmaz# make install-prometheus
kubectl apply --server-side -k config/prometheus
serviceaccount/llmaz-prometheus serverside-applied
clusterrole.rbac.authorization.k8s.io/llmaz-prometheus serverside-applied
clusterrolebinding.rbac.authorization.k8s.io/llmaz-prometheus serverside-applied
prometheus.monitoring.coreos.com/llmaz-prometheus serverside-applied
servicemonitor.monitoring.coreos.com/llmaz-controller-manager-metrics-monitor serverside-applied
```

### Check Related Resources

Verify that the necessary resources have been created:

- ServiceMonitor
```bash
root@VM-0-5-ubuntu:/home/ubuntu/llmaz# kubectl get ServiceMonitor -n llmaz-system
NAME AGE
llmaz-controller-manager-metrics-monitor 59s
```
- Prometheus Pods
```bash
root@VM-0-5-ubuntu:/home/ubuntu/llmaz# kubectl get pods -n llmaz-system
NAME READY STATUS RESTARTS AGE
llmaz-controller-manager-7ff8f7d9bd-vztls 2/2 Running 0 28s
prometheus-llmaz-prometheus-0 2/2 Running 0 27s
```
- Services
```bash
root@VM-0-5-ubuntu:/home/ubuntu/llmaz# kubectl get svc -n llmaz-system
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
llmaz-controller-manager-metrics-service ClusterIP 10.96.79.226 <none> 8443/TCP 46s
llmaz-webhook-service ClusterIP 10.96.249.226 <none> 443/TCP 46s
prometheus-operated ClusterIP None <none> 9090/TCP 45s
```

### View metrics using the prometheus UI
Use port forwarding to access the Prometheus UI from your local machine:

```bash
root@VM-0-5-ubuntu:/home/ubuntu# kubectl port-forward services/prometheus-operated 9090:9090 --address 0.0.0.0 -n llmaz-system
Forwarding from 0.0.0.0:9090 -> 9090
```

If using kind, we can use port-forward, `kubectl port-forward services/prometheus-operated 39090:9090 --address 0.0.0.0 -n llmaz-system`
This allows us to access prometheus using a browser: `http://localhost:9090/query`

![prometheus](prometheus.png?raw=true)
Binary file added docs/prometheus-operator/prometheus.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading