diff --git a/pkg/metrics/client_go_adapter.go b/pkg/metrics/client_go_adapter.go index ff28998c44..e0a4f72e12 100644 --- a/pkg/metrics/client_go_adapter.go +++ b/pkg/metrics/client_go_adapter.go @@ -1,5 +1,5 @@ /* -Copyright 2018 The Kubernetes Authors. +Copyright 2025 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,6 +18,8 @@ package metrics import ( "context" + "net/url" + "time" "github.com/prometheus/client_golang/prometheus" clientmetrics "k8s.io/client-go/tools/metrics" @@ -28,7 +30,59 @@ import ( // from Kubernetes so that we match the core controllers. var ( - // client metrics. + // client metrics from https://github.com/kubernetes/kubernetes/blob/v1.33.0/staging/src/k8s.io/component-base/metrics/prometheus/restclient/metrics.go + // except for rest_client_exec_plugin_* metrics which controllers wouldn't use + + // requestLatency is a Prometheus Histogram metric type partitioned by + // "verb", and "host" labels. It is used for the rest client latency metrics. + requestLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_request_duration_seconds", + Help: "Request latency in seconds. Broken down by verb, and host.", + Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0}, + }, + []string{"verb", "host"}, + ) + + // resolverLatency is a Prometheus Histogram metric type partitioned by + // "host" labels. It is used for the rest client DNS resolver latency metrics. + resolverLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_dns_resolution_duration_seconds", + Help: "DNS resolver latency in seconds. Broken down by host.", + Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0}, + }, + []string{"host"}, + ) + + requestSize = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_request_size_bytes", + Help: "Request size in bytes. Broken down by verb and host.", + // 64 bytes to 16MB + Buckets: []float64{64, 256, 512, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216}, + }, + []string{"verb", "host"}, + ) + + responseSize = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_response_size_bytes", + Help: "Response size in bytes. Broken down by verb and host.", + // 64 bytes to 16MB + Buckets: []float64{64, 256, 512, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216}, + }, + []string{"verb", "host"}, + ) + + rateLimiterLatency = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "rest_client_rate_limiter_duration_seconds", + Help: "Client side rate limiter latency in seconds. Broken down by verb, and host.", + Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0}, + }, + []string{"verb", "host"}, + ) requestResult = prometheus.NewCounterVec( prometheus.CounterOpts{ @@ -37,6 +91,30 @@ var ( }, []string{"code", "method", "host"}, ) + + requestRetry = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rest_client_request_retries_total", + Help: "Number of request retries, partitioned by status code, verb, and host.", + }, + []string{"code", "verb", "host"}, + ) + + transportCacheEntries = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "rest_client_transport_cache_entries", + Help: "Number of transport entries in the internal cache.", + }, + ) + + transportCacheCalls = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "rest_client_transport_create_calls_total", + Help: "Number of calls to get a new transport, partitioned by the result of the operation " + + "hit: obtained from the cache, miss: created and added to the cache, uncacheable: created and not cached", + }, + []string{"result"}, + ) ) func init() { @@ -46,26 +124,101 @@ func init() { // registerClientMetrics sets up the client latency metrics from client-go. func registerClientMetrics() { // register the metrics with our registry - Registry.MustRegister(requestResult) + Registry.MustRegister(requestResult, + requestLatency, + resolverLatency, + requestSize, + responseSize, + rateLimiterLatency, + requestRetry, + transportCacheEntries, + transportCacheCalls, + ) // register the metrics with client-go clientmetrics.Register(clientmetrics.RegisterOpts{ - RequestResult: &resultAdapter{metric: requestResult}, + RequestResult: &requestResultAdapter{metric: requestResult}, + RequestLatency: &requestLatencyAdapter{metric: requestLatency}, + ResolverLatency: &resolverLatencyAdapter{metric: resolverLatency}, + RequestSize: &requestSizeAdapter{metric: requestSize}, + ResponseSize: &responseSizeAdapter{metric: responseSize}, + RateLimiterLatency: &rateLimiterLatencyAdapter{metric: rateLimiterLatency}, + RequestRetry: &requestRetryAdapter{metric: requestRetry}, + TransportCacheEntries: &transportCacheEntriesAdapter{metric: transportCacheEntries}, + TransportCreateCalls: &transportCreateCallsAdapter{metric: transportCacheCalls}, }) } -// this section contains adapters, implementations, and other sundry organic, artisanally -// hand-crafted syntax trees required to convince client-go that it actually wants to let -// someone use its metrics. +// Prometheus adapters for client-go metrics hooks. + +type requestResultAdapter struct { + metric *prometheus.CounterVec +} + +func (r *requestResultAdapter) Increment(_ context.Context, code, method, host string) { + r.metric.WithLabelValues(code, method, host).Inc() +} + +type requestLatencyAdapter struct { + metric *prometheus.HistogramVec +} + +func (l *requestLatencyAdapter) Observe(_ context.Context, verb string, u url.URL, latency time.Duration) { + l.metric.WithLabelValues(verb, u.Host).Observe(latency.Seconds()) +} + +type resolverLatencyAdapter struct { + metric *prometheus.HistogramVec +} + +func (r *resolverLatencyAdapter) Observe(_ context.Context, host string, latency time.Duration) { + r.metric.WithLabelValues(host).Observe(latency.Seconds()) +} + +type requestSizeAdapter struct { + metric *prometheus.HistogramVec +} + +func (s *requestSizeAdapter) Observe(_ context.Context, verb string, host string, size float64) { + s.metric.WithLabelValues(verb, host).Observe(size) +} + +type responseSizeAdapter struct { + metric *prometheus.HistogramVec +} + +func (s *responseSizeAdapter) Observe(_ context.Context, verb string, host string, size float64) { + s.metric.WithLabelValues(verb, host).Observe(size) +} + +type rateLimiterLatencyAdapter struct { + metric *prometheus.HistogramVec +} -// Client metrics adapters (method #1 for client-go metrics), -// copied (more-or-less directly) from k8s.io/kubernetes setup code -// (which isn't anywhere in an easily-importable place). +func (l *rateLimiterLatencyAdapter) Observe(_ context.Context, verb string, u url.URL, latency time.Duration) { + l.metric.WithLabelValues(verb, u.Host).Observe(latency.Seconds()) +} -type resultAdapter struct { +type requestRetryAdapter struct { metric *prometheus.CounterVec } -func (r *resultAdapter) Increment(_ context.Context, code, method, host string) { +func (r *requestRetryAdapter) IncrementRetry(_ context.Context, code string, method string, host string) { r.metric.WithLabelValues(code, method, host).Inc() } + +type transportCacheEntriesAdapter struct { + metric prometheus.Gauge +} + +func (t *transportCacheEntriesAdapter) Observe(value int) { + t.metric.Set(float64(value)) +} + +type transportCreateCallsAdapter struct { + metric *prometheus.CounterVec +} + +func (t *transportCreateCallsAdapter) Increment(result string) { + t.metric.WithLabelValues(result).Inc() +} diff --git a/pkg/metrics/client_go_adapter_suite_test.go b/pkg/metrics/client_go_adapter_suite_test.go new file mode 100644 index 0000000000..ca82d82a7b --- /dev/null +++ b/pkg/metrics/client_go_adapter_suite_test.go @@ -0,0 +1,29 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestClientGoMetrics(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "client-go metric adapters Suite") +} diff --git a/pkg/metrics/client_go_adapter_test.go b/pkg/metrics/client_go_adapter_test.go new file mode 100644 index 0000000000..1483827e53 --- /dev/null +++ b/pkg/metrics/client_go_adapter_test.go @@ -0,0 +1,284 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "context" + "net/url" + "strings" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +const ( + testHost = "test-host:8080" + testVerb = "GET" + testMethod = "GET" + testCode = "200" +) + +var ( + testURL = &url.URL{Host: testHost} +) + +func setupTest(metric prometheus.Collector) prometheus.Gatherer { + reg := prometheus.NewRegistry() + reg.MustRegister(metric) + return reg +} + +var _ = Describe("client-go metrics", func() { + Describe("RequestResultAdapter", func() { + It("increments the counter correctly", func() { + metric := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "test_metric", + Help: "foo", + }, + []string{"code", "method", "host"}, + ) + reg := setupTest(metric) + adapter := &requestResultAdapter{metric: metric} + + adapter.Increment(context.TODO(), testCode, testMethod, testHost) + adapter.Increment(context.TODO(), testCode, testMethod, testHost) + + Expect(testutil.GatherAndCompare(reg, strings.NewReader(` +# HELP test_metric foo +# TYPE test_metric counter +test_metric{code="200",host="test-host:8080",method="GET"} 2 +`))).To(Succeed()) + }) + }) + + Describe("RequestSizeAdapter", func() { + It("records histogram observations", func() { + metric := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "test_metric", + Help: "foo", + Buckets: []float64{100, 1000, 10000}, + }, + []string{"verb", "host"}, + ) + reg := setupTest(metric) + adapter := &requestSizeAdapter{metric: metric} + + adapter.Observe(context.TODO(), testVerb, testHost, 500) + adapter.Observe(context.TODO(), testVerb, testHost, 5000) + + Expect(testutil.GatherAndCompare(reg, strings.NewReader(` +# HELP test_metric foo +# TYPE test_metric histogram +test_metric_bucket{host="test-host:8080",verb="GET",le="100"} 0 +test_metric_bucket{host="test-host:8080",verb="GET",le="1000"} 1 +test_metric_bucket{host="test-host:8080",verb="GET",le="10000"} 2 +test_metric_bucket{host="test-host:8080",verb="GET",le="+Inf"} 2 +test_metric_sum{host="test-host:8080",verb="GET"} 5500 +test_metric_count{host="test-host:8080",verb="GET"} 2 +`))).To(Succeed()) + }) + }) + + Describe("ResponseSizeAdapter", func() { + It("records histogram observations", func() { + metric := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "test_metric", + Help: "foo", + Buckets: []float64{100, 1000, 10000}, + }, + []string{"verb", "host"}, + ) + reg := setupTest(metric) + adapter := &responseSizeAdapter{metric: metric} + + adapter.Observe(context.TODO(), testVerb, testHost, 750) + adapter.Observe(context.TODO(), testVerb, testHost, 7500) + + Expect(testutil.GatherAndCompare(reg, strings.NewReader(` +# HELP test_metric foo +# TYPE test_metric histogram +test_metric_bucket{host="test-host:8080",verb="GET",le="100"} 0 +test_metric_bucket{host="test-host:8080",verb="GET",le="1000"} 1 +test_metric_bucket{host="test-host:8080",verb="GET",le="10000"} 2 +test_metric_bucket{host="test-host:8080",verb="GET",le="+Inf"} 2 +test_metric_sum{host="test-host:8080",verb="GET"} 8250 +test_metric_count{host="test-host:8080",verb="GET"} 2 +`))).To(Succeed()) + }) + }) + + Describe("RateLimiterLatencyAdapter", func() { + It("records latency in histogram", func() { + metric := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "test_metric", + Help: "foo", + Buckets: []float64{0.1, 0.2, 0.4}, + }, + []string{"verb", "host"}, + ) + reg := setupTest(metric) + adapter := &rateLimiterLatencyAdapter{metric: metric} + + adapter.Observe(context.TODO(), testVerb, *testURL, 300*time.Millisecond) + + Expect(testutil.GatherAndCompare(reg, strings.NewReader(` +# HELP test_metric foo +# TYPE test_metric histogram +test_metric_bucket{host="test-host:8080",verb="GET",le="0.1"} 0 +test_metric_bucket{host="test-host:8080",verb="GET",le="0.2"} 0 +test_metric_bucket{host="test-host:8080",verb="GET",le="0.4"} 1 +test_metric_bucket{host="test-host:8080",verb="GET",le="+Inf"} 1 +test_metric_sum{host="test-host:8080",verb="GET"} 0.3 +test_metric_count{host="test-host:8080",verb="GET"} 1 +`))).To(Succeed()) + }) + }) + + Describe("ResolverLatencyAdapter", func() { + It("records latency in histogram", func() { + metric := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "test_metric", + Help: "foo", + Buckets: []float64{0.1, 0.2, 0.4}, + }, + []string{"host"}, + ) + reg := setupTest(metric) + adapter := &resolverLatencyAdapter{metric: metric} + + adapter.Observe(context.TODO(), testHost, 120*time.Millisecond) + adapter.Observe(context.TODO(), testHost, 300*time.Millisecond) + + Expect(testutil.GatherAndCompare(reg, strings.NewReader(` +# HELP test_metric foo +# TYPE test_metric histogram +test_metric_bucket{host="test-host:8080",le="0.1"} 0 +test_metric_bucket{host="test-host:8080",le="0.2"} 1 +test_metric_bucket{host="test-host:8080",le="0.4"} 2 +test_metric_bucket{host="test-host:8080",le="+Inf"} 2 +test_metric_sum{host="test-host:8080"} 0.42 +test_metric_count{host="test-host:8080"} 2 +`))).To(Succeed()) + }) + }) + + Describe("RequestRetryAdapter", func() { + It("increments retry counter", func() { + metric := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "test_metric", + Help: "foo", + }, + []string{"code", "verb", "host"}, + ) + reg := setupTest(metric) + adapter := &requestRetryAdapter{metric: metric} + + adapter.IncrementRetry(context.TODO(), testCode, testVerb, testHost) + adapter.IncrementRetry(context.TODO(), testCode, testVerb, testHost) + + Expect(testutil.GatherAndCompare(reg, strings.NewReader(` +# HELP test_metric foo +# TYPE test_metric counter +test_metric{code="200",host="test-host:8080",verb="GET"} 2 +`))).To(Succeed()) + }) + }) + + Describe("TransportCacheEntriesAdapter", func() { + It("sets gauge value", func() { + metric := prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "test_metric", + Help: "foo", + }, + ) + reg := setupTest(metric) + adapter := &transportCacheEntriesAdapter{metric: metric} + + adapter.Observe(5) + + Expect(testutil.GatherAndCompare(reg, strings.NewReader(` +# HELP test_metric foo +# TYPE test_metric gauge +test_metric 5 +`))).To(Succeed()) + }) + }) + + Describe("TransportCreateCallsAdapter", func() { + It("increments counter for results", func() { + metric := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "test_metric", + Help: "foo", + }, + []string{"result"}, + ) + reg := setupTest(metric) + adapter := &transportCreateCallsAdapter{metric: metric} + + adapter.Increment("hit") + adapter.Increment("miss") + adapter.Increment("hit") + + Expect(testutil.GatherAndCompare(reg, strings.NewReader(` +# HELP test_metric foo +# TYPE test_metric counter +test_metric{result="hit"} 2 +test_metric{result="miss"} 1 +`))).To(Succeed()) + }) + }) + + Describe("RequestLatencyAdapter", func() { + It("records request latency in histogram", func() { + metric := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "test_metric", + Help: "foo", + Buckets: []float64{0.1, 0.2, 0.4}, + }, + []string{"verb", "host"}, + ) + reg := setupTest(metric) + adapter := &requestLatencyAdapter{metric: metric} + + adapter.Observe(context.TODO(), testVerb, *testURL, 300*time.Millisecond) + + Expect(testutil.GatherAndCompare(reg, strings.NewReader(` +# HELP test_metric foo +# TYPE test_metric histogram +test_metric_bucket{host="test-host:8080",verb="GET",le="0.1"} 0 +test_metric_bucket{host="test-host:8080",verb="GET",le="0.2"} 0 +test_metric_bucket{host="test-host:8080",verb="GET",le="0.4"} 1 +test_metric_bucket{host="test-host:8080",verb="GET",le="+Inf"} 1 +test_metric_sum{host="test-host:8080",verb="GET"} 0.3 +test_metric_count{host="test-host:8080",verb="GET"} 1 +`))).To(Succeed()) + }) + }) +})