@@ -12,54 +12,75 @@ metadata:
12
12
app.kubernetes.io/name : prometheus-adapter
13
13
data :
14
14
config.yaml : |
15
- rules:
16
- {{- if .Values.tgi.horizontalPodAutoscaler.enabled }}
17
- # check metric with:
18
- # kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1/namespaces/default/service/*/{{ include "tgi.metricPrefix" .Subcharts.tgi }}_request_latency | jq
19
- #
20
- - seriesQuery: '{__name__="tgi_request_inference_duration_sum",service="{{ include "tgi.fullname" .Subcharts.tgi }}"}'
21
- # Average request latency from TGI histograms, over 1 min
22
- # (0.001 divider add is to make sure there's always a valid value)
23
- metricsQuery: 'rate(tgi_request_inference_duration_sum{service="{{ include "tgi.fullname" .Subcharts.tgi }}",<<.LabelMatchers>>}[1m]) / (0.001+rate(tgi_request_inference_duration_count{service="{{ include "tgi.fullname" .Subcharts.tgi }}",<<.LabelMatchers>>}[1m]))'
24
- name:
25
- matches: ^tgi_request_inference_duration_sum
26
- as: "{{ include "tgi.metricPrefix" .Subcharts.tgi }}_request_latency"
27
- resources:
28
- # HPA needs both namespace + suitable object resource for its query paths:
29
- # /apis/custom.metrics.k8s.io/v1beta1/namespaces/default/service/*/tgi_request_latency
30
- # (pod is not suitable object type for matching as each instance has different name)
31
- overrides:
32
- namespace:
33
- resource: namespace
34
- service:
35
- resource: service
36
- {{- end }}
37
- {{- if .Values.teirerank.horizontalPodAutoscaler.enabled }}
38
- - seriesQuery: '{__name__="te_request_inference_duration_sum",service="{{ include "teirerank.fullname" .Subcharts.teirerank }}"}'
39
- # Average request latency from TEI histograms, over 1 min
40
- metricsQuery: 'rate(te_request_inference_duration_sum{service="{{ include "teirerank.fullname" .Subcharts.teirerank }}",<<.LabelMatchers>>}[1m]) / (0.001+rate(te_request_inference_duration_count{service="{{ include "teirerank.fullname" .Subcharts.teirerank }}",<<.LabelMatchers>>}[1m]))'
41
- name:
42
- matches: ^te_request_inference_duration_sum
43
- as: "{{ include "teirerank.metricPrefix" .Subcharts.teirerank }}_request_latency"
44
- resources:
45
- overrides:
46
- namespace:
47
- resource: namespace
48
- service:
49
- resource: service
50
- {{- end }}
51
- {{- if .Values.tei.horizontalPodAutoscaler.enabled }}
52
- - seriesQuery: '{__name__="te_request_inference_duration_sum",service="{{ include "tei.fullname" .Subcharts.tei }}"}'
53
- # Average request latency from TEI histograms, over 1 min
54
- metricsQuery: 'rate(te_request_inference_duration_sum{service="{{ include "tei.fullname" .Subcharts.tei }}",<<.LabelMatchers>>}[1m]) / (0.001+rate(te_request_inference_duration_count{service="{{ include "tei.fullname" .Subcharts.tei }}",<<.LabelMatchers>>}[1m]))'
55
- name:
56
- matches: ^te_request_inference_duration_sum
57
- as: "{{ include "tei.metricPrefix" .Subcharts.tei }}_request_latency"
58
- resources:
59
- overrides:
60
- namespace:
61
- resource: namespace
62
- service:
63
- resource: service
64
- {{- end }}
15
+ rules:
16
+ {{- if .Values.tgi.horizontalPodAutoscaler.enabled }}
17
+ # check metric with:
18
+ # kubectl get --raw /apis/custom.metrics.k8s.io/v1beta1/namespaces/default/service/*/<metric> | jq
19
+ #
20
+ {{- if .Values.tgi.accelDevice }}
21
+ - seriesQuery: '{__name__="tgi_queue_size",service="{{ include "tgi.fullname" .Subcharts.tgi }}"}'
22
+ # TGI instances queue_size sum
23
+ metricsQuery: 'sum by (namespace,service) (tgi_queue_size{service="{{ include "tgi.fullname" .Subcharts.tgi }}",<<.LabelMatchers>>})'
24
+ name:
25
+ matches: ^tgi_queue_size
26
+ as: "{{ include "tgi.metricPrefix" .Subcharts.tgi }}_queue_size_sum"
27
+ {{- else }}
28
+ - seriesQuery: '{__name__="tgi_request_inference_duration_sum",service="{{ include "tgi.fullname" .Subcharts.tgi }}"}'
29
+ # Average request latency from TGI histograms, over 1 min
30
+ # (0.001 divider add is to make sure there's always a valid value)
31
+ metricsQuery: 'rate(tgi_request_inference_duration_sum{service="{{ include "tgi.fullname" .Subcharts.tgi }}",<<.LabelMatchers>>}[1m]) / (0.001+rate(tgi_request_inference_duration_count{service="{{ include "tgi.fullname" .Subcharts.tgi }}",<<.LabelMatchers>>}[1m]))'
32
+ name:
33
+ matches: ^tgi_request_inference_duration_sum
34
+ as: "{{ include "tgi.metricPrefix" .Subcharts.tgi }}_request_latency"
35
+ {{- end }}
36
+ resources:
37
+ # HPA needs both namespace + suitable object resource for its query paths:
38
+ # /apis/custom.metrics.k8s.io/v1beta1/namespaces/default/service/*/<metric>
39
+ # (pod is not suitable object type for matching as each instance has different name)
40
+ overrides:
41
+ namespace: {resource: "namespace"}
42
+ service: {resource: "service"}
43
+ {{- end }}
44
+ {{- if .Values.teirerank.horizontalPodAutoscaler.enabled }}
45
+ {{- if .Values.teirerank.accelDevice }}
46
+ - seriesQuery: '{__name__="te_queue_size",service="{{ include "teirerank.fullname" .Subcharts.teirerank }}"}'
47
+ # TEI instances queue_size sum
48
+ metricsQuery: 'sum by (namespace,service) (te_queue_size{service="{{ include "teirerank.fullname" .Subcharts.teirerank }}",<<.LabelMatchers>>})'
49
+ name:
50
+ matches: ^te_queue_size
51
+ as: "{{ include "teirerank.metricPrefix" .Subcharts.teirerank }}_queue_size_sum"
52
+ {{- else }}
53
+ - seriesQuery: '{__name__="te_request_inference_duration_sum",service="{{ include "teirerank.fullname" .Subcharts.teirerank }}"}'
54
+ # Average request latency from TEI histograms, over 1 min
55
+ metricsQuery: 'rate(te_request_inference_duration_sum{service="{{ include "teirerank.fullname" .Subcharts.teirerank }}",<<.LabelMatchers>>}[1m]) / (0.001+rate(te_request_inference_duration_count{service="{{ include "teirerank.fullname" .Subcharts.teirerank }}",<<.LabelMatchers>>}[1m]))'
56
+ name:
57
+ matches: ^te_request_inference_duration_sum
58
+ as: "{{ include "teirerank.metricPrefix" .Subcharts.teirerank }}_request_latency"
59
+ {{- end }}
60
+ resources:
61
+ overrides:
62
+ namespace: {resource: "namespace"}
63
+ service: {resource: "service"}
64
+ {{- end }}
65
+ {{- if .Values.tei.horizontalPodAutoscaler.enabled }}
66
+ {{- if .Values.tei.accelDevice }}
67
+ - seriesQuery: '{__name__="te_queue_size",service="{{ include "tei.fullname" .Subcharts.tei }}"}'
68
+ # TEI instances queue_size sum
69
+ metricsQuery: 'sum by (namespace,service) (te_queue_size{service="{{ include "tei.fullname" .Subcharts.tei }}",<<.LabelMatchers>>})'
70
+ name:
71
+ matches: ^te_queue_size
72
+ as: "{{ include "tei.metricPrefix" .Subcharts.tei }}_queue_size_sum"
73
+ {{- else }}
74
+ - seriesQuery: '{__name__="te_request_inference_duration_sum",service="{{ include "tei.fullname" .Subcharts.tei }}"}'
75
+ # Average request latency from TEI histograms, over 1 min
76
+ metricsQuery: 'rate(te_request_inference_duration_sum{service="{{ include "tei.fullname" .Subcharts.tei }}",<<.LabelMatchers>>}[1m]) / (0.001+rate(te_request_inference_duration_count{service="{{ include "tei.fullname" .Subcharts.tei }}",<<.LabelMatchers>>}[1m]))'
77
+ name:
78
+ matches: ^te_request_inference_duration_sum
79
+ as: "{{ include "tei.metricPrefix" .Subcharts.tei }}_request_latency"
80
+ {{- end }}
81
+ resources:
82
+ overrides:
83
+ namespace: {resource: "namespace"}
84
+ service: {resource: "service"}
85
+ {{- end }}
65
86
{{- end }}
0 commit comments