Skip to content

Commit ffee96c

Browse files
rabenhorstrikatz
andauthored
Add native histogram support for histogram metrics (#9971)
Co-authored-by: Ricardo Katz <[email protected]>
1 parent 1ea376a commit ffee96c

File tree

9 files changed

+128
-38
lines changed

9 files changed

+128
-38
lines changed

cmd/dataplane/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ func main() {
6666
mc := metric.NewDummyCollector()
6767
if conf.EnableMetrics {
6868
// TODO: Ingress class is not a part of dataplane anymore
69-
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.ExcludeSocketMetrics)
69+
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics)
7070
if err != nil {
7171
klog.Fatalf("Error creating prometheus collector: %v", err)
7272
}

cmd/nginx/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ func main() {
130130

131131
mc := metric.NewDummyCollector()
132132
if conf.EnableMetrics {
133-
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.ExcludeSocketMetrics)
133+
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics)
134134
if err != nil {
135135
klog.Fatalf("Error creating prometheus collector: %v", err)
136136
}

docs/user-guide/cli-arguments.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ They are set in the container spec of the `ingress-nginx-controller` Deployment
88
|----------|-------------|
99
| `--annotations-prefix` | Prefix of the Ingress annotations specific to the NGINX controller. (default "nginx.ingress.kubernetes.io") |
1010
| `--apiserver-host` | Address of the Kubernetes API server. Takes the form "protocol://address:port". If not specified, it is assumed the program runs inside a Kubernetes cluster and local discovery is attempted. |
11+
| `--bucket-factor` | Bucket factor for native histograms. Value must be > 1 for enabling native histograms. (default 0) |
1112
| `--certificate-authority` | Path to a cert file for the certificate authority. This certificate is used only when the flag --apiserver-host is specified. |
1213
| `--configmap` | Name of the ConfigMap containing custom global configurations for the controller. |
1314
| `--controller-class` | Ingress Class Controller value this Ingress satisfies. The class of an Ingress object is set using the field IngressClassName in Kubernetes clusters version v1.19.0 or higher. The .spec.controller value of the IngressClass referenced in an Ingress Object should be the same value specified here to make this object be watched. |
@@ -40,6 +41,7 @@ They are set in the container spec of the `ingress-nginx-controller` Deployment
4041
| `--internal-logger-address` | Address to be used when binding internal syslogger. (default 127.0.0.1:11514) |
4142
| `--kubeconfig` | Path to a kubeconfig file containing authorization and API server information. |
4243
| `--length-buckets` | Set of buckets which will be used for prometheus histogram metrics such as RequestLength, ResponseLength. (default `[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]`) |
44+
| `--max-buckets` | Maximum number of buckets for native histograms. (default 100) |
4345
| `--maxmind-edition-ids` | Maxmind edition ids to download GeoLite2 Databases. (default "GeoLite2-City,GeoLite2-ASN") |
4446
| `--maxmind-retries-timeout` | Maxmind downloading delay between 1st and 2nd attempt, 0s - do not retry to download if something went wrong. (default 0s) |
4547
| `--maxmind-retries-count` | Number of attempts to download the GeoIP DB. (default 1) |

go.work.sum

Lines changed: 69 additions & 2 deletions
Large diffs are not rendered by default.

internal/ingress/controller/controller.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ type Configuration struct {
108108
EnableMetrics bool
109109
MetricsPerHost bool
110110
MetricsBuckets *collectors.HistogramBuckets
111+
MetricsBucketFactor float64
112+
MetricsMaxBuckets uint32
111113
ReportStatusClasses bool
112114
ExcludeSocketMetrics []string
113115

internal/ingress/metric/collectors/socket.go

Lines changed: 43 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ var requestTags = []string{
9999

100100
// NewSocketCollector creates a new SocketCollector instance using
101101
// the ingress watch namespace and class used by the controller
102-
func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets, excludeMetrics []string) (*SocketCollector, error) {
102+
func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludeMetrics []string) (*SocketCollector, error) {
103103
socket := "/tmp/nginx/prometheus-nginx.socket"
104104
// unix sockets must be unlink()ed before being used
105105
//nolint:errcheck // Ignore unlink error
@@ -144,11 +144,13 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
144144

145145
connectTime: histogramMetric(
146146
&prometheus.HistogramOpts{
147-
Name: "connect_duration_seconds",
148-
Help: "The time spent on establishing a connection with the upstream server",
149-
Namespace: PrometheusNamespace,
150-
ConstLabels: constLabels,
151-
Buckets: buckets.TimeBuckets,
147+
Name: "connect_duration_seconds",
148+
Help: "The time spent on establishing a connection with the upstream server",
149+
Namespace: PrometheusNamespace,
150+
ConstLabels: constLabels,
151+
Buckets: buckets.TimeBuckets,
152+
NativeHistogramBucketFactor: bucketFactor,
153+
NativeHistogramMaxBucketNumber: maxBuckets,
152154
},
153155
requestTags,
154156
em,
@@ -157,23 +159,27 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
157159

158160
headerTime: histogramMetric(
159161
&prometheus.HistogramOpts{
160-
Name: "header_duration_seconds",
161-
Help: "The time spent on receiving first header from the upstream server",
162-
Namespace: PrometheusNamespace,
163-
ConstLabels: constLabels,
164-
Buckets: buckets.TimeBuckets,
162+
Name: "header_duration_seconds",
163+
Help: "The time spent on receiving first header from the upstream server",
164+
Namespace: PrometheusNamespace,
165+
ConstLabels: constLabels,
166+
Buckets: buckets.TimeBuckets,
167+
NativeHistogramBucketFactor: bucketFactor,
168+
NativeHistogramMaxBucketNumber: maxBuckets,
165169
},
166170
requestTags,
167171
em,
168172
mm,
169173
),
170174
responseTime: histogramMetric(
171175
&prometheus.HistogramOpts{
172-
Name: "response_duration_seconds",
173-
Help: "The time spent on receiving the response from the upstream server",
174-
Namespace: PrometheusNamespace,
175-
ConstLabels: constLabels,
176-
Buckets: buckets.TimeBuckets,
176+
Name: "response_duration_seconds",
177+
Help: "The time spent on receiving the response from the upstream server",
178+
Namespace: PrometheusNamespace,
179+
ConstLabels: constLabels,
180+
Buckets: buckets.TimeBuckets,
181+
NativeHistogramBucketFactor: bucketFactor,
182+
NativeHistogramMaxBucketNumber: maxBuckets,
177183
},
178184
requestTags,
179185
em,
@@ -182,11 +188,13 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
182188

183189
requestTime: histogramMetric(
184190
&prometheus.HistogramOpts{
185-
Name: "request_duration_seconds",
186-
Help: "The request processing time in milliseconds",
187-
Namespace: PrometheusNamespace,
188-
ConstLabels: constLabels,
189-
Buckets: buckets.TimeBuckets,
191+
Name: "request_duration_seconds",
192+
Help: "The request processing time in milliseconds",
193+
Namespace: PrometheusNamespace,
194+
ConstLabels: constLabels,
195+
Buckets: buckets.TimeBuckets,
196+
NativeHistogramBucketFactor: bucketFactor,
197+
NativeHistogramMaxBucketNumber: maxBuckets,
190198
},
191199
requestTags,
192200
em,
@@ -195,11 +203,13 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
195203

196204
responseLength: histogramMetric(
197205
&prometheus.HistogramOpts{
198-
Name: "response_size",
199-
Help: "The response length (including request line, header, and request body)",
200-
Namespace: PrometheusNamespace,
201-
ConstLabels: constLabels,
202-
Buckets: buckets.LengthBuckets,
206+
Name: "response_size",
207+
Help: "The response length (including request line, header, and request body)",
208+
Namespace: PrometheusNamespace,
209+
ConstLabels: constLabels,
210+
Buckets: buckets.LengthBuckets,
211+
NativeHistogramBucketFactor: bucketFactor,
212+
NativeHistogramMaxBucketNumber: maxBuckets,
203213
},
204214
requestTags,
205215
em,
@@ -208,11 +218,13 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat
208218

209219
requestLength: histogramMetric(
210220
&prometheus.HistogramOpts{
211-
Name: "request_size",
212-
Help: "The request length (including request line, header, and request body)",
213-
Namespace: PrometheusNamespace,
214-
ConstLabels: constLabels,
215-
Buckets: buckets.LengthBuckets,
221+
Name: "request_size",
222+
Help: "The request length (including request line, header, and request body)",
223+
Namespace: PrometheusNamespace,
224+
ConstLabels: constLabels,
225+
Buckets: buckets.LengthBuckets,
226+
NativeHistogramBucketFactor: bucketFactor,
227+
NativeHistogramMaxBucketNumber: maxBuckets,
216228
},
217229
requestTags,
218230
em,

internal/ingress/metric/collectors/socket_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ func TestCollector(t *testing.T) {
8383
prometheus.ExponentialBuckets(10, 10, 7),
8484
}
8585

86+
bucketFactor := 1.1
87+
maxBuckets := uint32(100)
88+
8689
cases := []struct {
8790
name string
8891
data []string
@@ -594,7 +597,7 @@ func TestCollector(t *testing.T) {
594597
t.Run(c.name, func(t *testing.T) {
595598
registry := prometheus.NewPedanticRegistry()
596599

597-
sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets, c.excludeMetrics)
600+
sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets, bucketFactor, maxBuckets, c.excludeMetrics)
598601
if err != nil {
599602
t.Errorf("%v: unexpected error creating new SocketCollector: %v", c.name, err)
600603
}

internal/ingress/metric/main.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ type collector struct {
7171
}
7272

7373
// NewCollector creates a new metric collector the for ingress controller
74-
func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets, excludedSocketMetrics []string) (Collector, error) {
74+
func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludedSocketMetrics []string) (Collector, error) {
7575
podNamespace := os.Getenv("POD_NAMESPACE")
7676
if podNamespace == "" {
7777
podNamespace = "default"
@@ -89,7 +89,7 @@ func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus
8989
return nil, err
9090
}
9191

92-
s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets, excludedSocketMetrics)
92+
s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets, bucketFactor, maxBuckets, excludedSocketMetrics)
9393
if err != nil {
9494
return nil, err
9595
}

pkg/flags/flags.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,8 @@ Requires the update-status parameter.`)
183183
timeBuckets = flags.Float64Slice("time-buckets", prometheus.DefBuckets, "Set of buckets which will be used for prometheus histogram metrics such as RequestTime, ResponseTime.")
184184
lengthBuckets = flags.Float64Slice("length-buckets", prometheus.LinearBuckets(10, 10, 10), "Set of buckets which will be used for prometheus histogram metrics such as RequestLength, ResponseLength.")
185185
sizeBuckets = flags.Float64Slice("size-buckets", prometheus.ExponentialBuckets(10, 10, 7), "Set of buckets which will be used for prometheus histogram metrics such as BytesSent.")
186+
bucketFactor = flags.Float64("bucket-factor", 0, "Bucket factor for native histograms. Value must be > 1 for enabling native histograms.")
187+
maxBuckets = flags.Uint32("max-buckets", 100, "Maximum number of buckets for native histograms.")
186188
excludeSocketMetrics = flags.StringSlice("exclude-socket-metrics", []string{}, "et of socket request metrics to exclude which won't be exported nor being calculated. E.g. 'nginx_ingress_controller_success,nginx_ingress_controller_header_duration_seconds'.")
187189
monitorMaxBatchSize = flags.Int("monitor-max-batch-size", 10000, "Max batch size of NGINX metrics.")
188190

@@ -339,6 +341,8 @@ https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-g
339341
EnableMetrics: *enableMetrics,
340342
MetricsPerHost: *metricsPerHost,
341343
MetricsBuckets: histogramBuckets,
344+
MetricsBucketFactor: *bucketFactor,
345+
MetricsMaxBuckets: *maxBuckets,
342346
ReportStatusClasses: *reportStatusClasses,
343347
ExcludeSocketMetrics: *excludeSocketMetrics,
344348
MonitorMaxBatchSize: *monitorMaxBatchSize,

0 commit comments

Comments
 (0)