Skip to content

Commit cc7e838

Browse files
committed
reconciler/managed: add crossplane_resource_drift_seconds metric
1 parent 0d8cbce commit cc7e838

File tree

4 files changed

+121
-1
lines changed

4 files changed

+121
-1
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ require (
88
github.com/go-logr/logr v1.2.4
99
github.com/google/go-cmp v0.5.9
1010
github.com/hashicorp/vault/api v1.9.2
11+
github.com/prometheus/client_golang v1.15.1
1112
github.com/spf13/afero v1.9.5
1213
golang.org/x/time v0.3.0
1314
google.golang.org/grpc v1.57.0
@@ -93,7 +94,6 @@ require (
9394
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect
9495
github.com/pkg/errors v0.9.1 // indirect
9596
github.com/pkg/profile v1.7.0 // indirect
96-
github.com/prometheus/client_golang v1.15.1 // indirect
9797
github.com/prometheus/client_model v0.4.0 // indirect
9898
github.com/prometheus/common v0.44.0 // indirect
9999
github.com/prometheus/procfs v0.10.0 // indirect

pkg/reconciler/managed/metrics.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
/*
2+
Copyright 2023 The Crossplane Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package managed
18+
19+
import (
20+
"context"
21+
"sync"
22+
"time"
23+
24+
"github.com/prometheus/client_golang/prometheus"
25+
"k8s.io/apimachinery/pkg/runtime/schema"
26+
"k8s.io/client-go/tools/cache"
27+
"sigs.k8s.io/controller-runtime/pkg/cluster"
28+
"sigs.k8s.io/controller-runtime/pkg/manager"
29+
"sigs.k8s.io/controller-runtime/pkg/metrics"
30+
31+
"github.com/crossplane/crossplane-runtime/pkg/resource"
32+
)
33+
34+
func init() {
35+
metrics.Registry.MustRegister(drift)
36+
}
37+
38+
var subSystem = "crossplane"
39+
40+
var (
41+
drift = prometheus.NewHistogramVec(prometheus.HistogramOpts{
42+
Subsystem: subSystem,
43+
Name: "resource_resource_drift_seconds",
44+
Help: "How long since the previous reconcile when a resource was found to be out of sync; excludes restart of the provider",
45+
Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10),
46+
}, []string{"group", "kind", "retries"})
47+
)
48+
49+
// driftRecorder records the time since the last observation of a resource
50+
// and records the time since on update as a metric. This represents an upper
51+
// bound for the duration the drift existed.
52+
type driftRecorder struct {
53+
lastObservation sync.Map
54+
gvk schema.GroupVersionKind
55+
56+
cluster cluster.Cluster
57+
}
58+
59+
var _ manager.Runnable = &driftRecorder{}
60+
61+
func (r *driftRecorder) Start(ctx context.Context) error {
62+
inf, err := r.cluster.GetCache().GetInformerForKind(ctx, r.gvk)
63+
if err != nil {
64+
return err
65+
}
66+
67+
registered, err := inf.AddEventHandler(cache.ResourceEventHandlerFuncs{
68+
DeleteFunc: func(obj interface{}) {
69+
if final, ok := obj.(cache.DeletedFinalStateUnknown); ok {
70+
obj = final.Obj
71+
}
72+
managed := obj.(resource.Managed)
73+
r.lastObservation.Delete(managed.GetName())
74+
},
75+
})
76+
if err != nil {
77+
return err
78+
}
79+
defer inf.RemoveEventHandler(registered) //nolint:errcheck // this happens on destruction. We cannot do anything anyway.
80+
81+
<-ctx.Done()
82+
83+
return nil
84+
}
85+
86+
func (r *driftRecorder) recordUnchanged(name string) {
87+
r.lastObservation.Store(name, time.Now())
88+
}
89+
90+
func (r *driftRecorder) recordUpdate(name string) {
91+
last, ok := r.lastObservation.Load(name)
92+
if !ok {
93+
return
94+
}
95+
96+
drift.WithLabelValues(r.gvk.Group, r.gvk.Kind).Observe(time.Since(last.(time.Time)).Seconds())
97+
}

pkg/reconciler/managed/reconciler.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,8 @@ type Reconciler struct {
482482

483483
features feature.Flags
484484

485+
driftRecorder driftRecorder
486+
485487
// The below structs embed the set of interfaces used to implement the
486488
// managed resource reconciler. We do this primarily for readability, so
487489
// that the reconciler logic reads r.external.Connect(),
@@ -671,6 +673,7 @@ func NewReconciler(m manager.Manager, of resource.ManagedKind, o ...ReconcilerOp
671673
creationGracePeriod: defaultGracePeriod,
672674
timeout: reconcileTimeout,
673675
managed: defaultMRManaged(m),
676+
driftRecorder: driftRecorder{cluster: m},
674677
external: defaultMRExternal(),
675678
supportedManagementPolicies: defaultSupportedManagementPolicies(),
676679
log: logging.NewNopLogger(),
@@ -681,6 +684,11 @@ func NewReconciler(m manager.Manager, of resource.ManagedKind, o ...ReconcilerOp
681684
ro(r)
682685
}
683686

687+
if err := m.Add(&r.driftRecorder); err != nil {
688+
r.log.Info("unable to register drift recorder with controller manager", "error", err)
689+
// no way to recover from this
690+
}
691+
684692
return r
685693
}
686694

@@ -1079,6 +1087,13 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco
10791087
// https://github.com/crossplane/crossplane/issues/289
10801088
log.Debug("External resource is up to date", "requeue-after", time.Now().Add(r.pollInterval))
10811089
managed.SetConditions(xpv1.ReconcileSuccess())
1090+
1091+
// record that we intentionally did not update the managed resource
1092+
// because no drift was detected. We call this so late in the reconcile
1093+
// because all the cases above could contribute (for different reasons)
1094+
// that the external object would not have been updated.
1095+
r.driftRecorder.recordUnchanged(managed.GetName())
1096+
10821097
return reconcile.Result{RequeueAfter: r.pollInterval}, errors.Wrap(r.client.Status().Update(ctx, managed), errUpdateManagedStatus)
10831098
}
10841099

@@ -1106,6 +1121,9 @@ func (r *Reconciler) Reconcile(ctx context.Context, req reconcile.Request) (reco
11061121
return reconcile.Result{Requeue: true}, errors.Wrap(r.client.Status().Update(ctx, managed), errUpdateManagedStatus)
11071122
}
11081123

1124+
// record the drift after the successful update.
1125+
r.driftRecorder.recordUpdate(managed.GetName())
1126+
11091127
if _, err := r.managed.PublishConnection(ctx, managed, update.ConnectionDetails); err != nil {
11101128
// If this is the first time we encounter this issue we'll be requeued
11111129
// implicitly when we update our status with the new error condition. If

pkg/resource/fake/mocks.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,11 @@ func (m *Manager) GetRESTMapper() meta.RESTMapper { return m.RESTMapper }
480480
// GetLogger returns the logger.
481481
func (m *Manager) GetLogger() logr.Logger { return m.Logger }
482482

483+
// Add adds a runnable to the manager.
484+
func (m *Manager) Add(_ manager.Runnable) error {
485+
return nil // do nothing
486+
}
487+
483488
// GV returns a mock schema.GroupVersion.
484489
var GV = schema.GroupVersion{Group: "g", Version: "v"}
485490

0 commit comments

Comments
 (0)