Skip to content

Commit 17f8448

Browse files
authored
Use exponential backoffs on secret source errors. (#732)
Previously, the back off duration was based on a fixed duration + some jitter. This PR introduces exponential back offs for all secret syncing controllers. The back off will be calculated and honored whenever an error is encountered while fetching from a secret source e.g: Vault, HCPVS. - add new metric runtime configuration gauge - unify sync reasons between VDS and VPS controllers
1 parent 0565d6e commit 17f8448

14 files changed

+438
-36
lines changed

chart/templates/_helpers.tpl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,3 +175,27 @@ globalTransformationOptions configures the manager's --global-transformation-opt
175175
{{- $opts | join "," -}}
176176
{{- end -}}
177177
{{- end -}}
178+
179+
180+
{{/*
181+
backOffOnSecretSourceError provides the back-off options for the manager when a
182+
secret source error occurs.
183+
*/}}
184+
{{- define "vso.backOffOnSecretSourceError" -}}
185+
{{- $opts := list -}}
186+
{{- with .Values.controller.manager.backOffOnSecretSourceError -}}
187+
{{- with .initialInterval -}}
188+
{{- $opts = mustAppend $opts (printf "--back-off-initial-interval=%s" .) -}}
189+
{{- end -}}
190+
{{- with .maxInterval -}}
191+
{{- $opts = mustAppend $opts (printf "--back-off-max-interval=%s" .) -}}
192+
{{- end -}}
193+
{{- with .multiplier -}}
194+
{{- $opts = mustAppend $opts (printf "--back-off-multiplier=%.2f" (. | float64)) -}}
195+
{{- end -}}
196+
{{- with .randomizationFactor -}}
197+
{{- $opts = mustAppend $opts (printf "--back-off-randomization-factor=%.2f" (. | float64)) -}}
198+
{{- end -}}
199+
{{- $opts | toYaml | nindent 8 -}}
200+
{{- end -}}
201+
{{- end -}}

chart/templates/deployment.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ spec:
7878
{{- if $opts }}
7979
- --global-transformation-options={{ $opts }}
8080
{{- end }}
81+
{{- with include "vso.backOffOnSecretSourceError" . }}
82+
{{- . -}}
83+
{{- end }}
8184
{{- if .Values.controller.manager.extraArgs }}
8285
{{- toYaml .Values.controller.manager.extraArgs | nindent 8 }}
8386
{{- end }}

chart/values.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,22 @@ controller:
113113
# in the destination K8s Secret.
114114
excludeRaw: false
115115

116+
# Backoff settings for the controller manager. These settings control the backoff behavior
117+
# when the controller encounters an error while fetching secrets from the SecretSource.
118+
backOffOnSecretSourceError:
119+
# Initial interval between retries.
120+
# @type: duration
121+
initialInterval: "5s"
122+
# Maximum interval between retries.
123+
# @type: duration
124+
maxInterval: "60s"
125+
# Randomization factor to add jitter to the interval between retries.
126+
# @type: float
127+
randomizationFactor: 0.5
128+
# Sets the multiplier for increasing the interval between retries.
129+
# @type: float
130+
multiplier: 1.5
131+
116132
# Configures the client cache which is used by the controller to cache (and potentially persist) vault tokens that
117133
# are the result of using the VaultAuthMethod. This enables re-use of Vault Tokens
118134
# throughout their TTLs as well as the ability to renew.

controllers/hcpvaultsecretsapp_controller.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ type HCPVaultSecretsAppReconciler struct {
5454
MinRefreshAfter time.Duration
5555
referenceCache ResourceReferenceCache
5656
GlobalTransformationOption *helpers.GlobalTransformationOption
57+
BackOffRegistry *BackOffRegistry
5758
}
5859

5960
//+kubebuilder:rbac:groups=secrets.hashicorp.com,resources=hcpvaultsecretsapps,verbs=get;list;watch;create;update;patch;delete
@@ -126,9 +127,12 @@ func (r *HCPVaultSecretsAppReconciler) Reconcile(ctx context.Context, req ctrl.R
126127
resp, err := c.OpenAppSecrets(params, nil)
127128
if err != nil {
128129
logger.Error(err, "Get App Secret", "appName", o.Spec.AppName)
130+
entry, _ := r.BackOffRegistry.Get(req.NamespacedName)
129131
return ctrl.Result{
130-
RequeueAfter: computeHorizonWithJitter(requeueDurationOnError),
132+
RequeueAfter: entry.NextBackOff(),
131133
}, nil
134+
} else {
135+
r.BackOffRegistry.Delete(req.NamespacedName)
132136
}
133137

134138
r.referenceCache.Set(SecretTransformation, req.NamespacedName,
@@ -211,6 +215,10 @@ func (r *HCPVaultSecretsAppReconciler) updateStatus(ctx context.Context, o *secr
211215
// SetupWithManager sets up the controller with the Manager.
212216
func (r *HCPVaultSecretsAppReconciler) SetupWithManager(mgr ctrl.Manager, opts controller.Options) error {
213217
r.referenceCache = newResourceReferenceCache()
218+
if r.BackOffRegistry == nil {
219+
r.BackOffRegistry = NewBackOffRegistry()
220+
}
221+
214222
return ctrl.NewControllerManagedBy(mgr).
215223
For(&secretsv1beta1.HCPVaultSecretsApp{}).
216224
WithEventFilter(syncableSecretPredicate(nil)).
@@ -273,7 +281,9 @@ func (r *HCPVaultSecretsAppReconciler) hvsClient(ctx context.Context, o *secrets
273281

274282
func (r *HCPVaultSecretsAppReconciler) handleDeletion(ctx context.Context, o client.Object) error {
275283
logger := log.FromContext(ctx)
276-
r.referenceCache.Remove(SecretTransformation, client.ObjectKeyFromObject(o))
284+
objKey := client.ObjectKeyFromObject(o)
285+
r.referenceCache.Remove(SecretTransformation, objKey)
286+
r.BackOffRegistry.Delete(objKey)
277287
if controllerutil.ContainsFinalizer(o, hcpVaultSecretsAppFinalizer) {
278288
logger.Info("Removing finalizer")
279289
if controllerutil.RemoveFinalizer(o, hcpVaultSecretsAppFinalizer) {

controllers/registry.go

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ package controllers
55

66
import (
77
"sync"
8+
"time"
89

10+
"github.com/cenkalti/backoff/v4"
911
"sigs.k8s.io/controller-runtime/pkg/client"
1012
)
1113

@@ -227,3 +229,69 @@ func (r *SyncRegistry) ObjectKeys() []client.ObjectKey {
227229

228230
return result
229231
}
232+
233+
// BackOffRegistry is a registry that stores sync backoff for a client.Object.
234+
type BackOffRegistry struct {
235+
m map[client.ObjectKey]*BackOff
236+
mu sync.RWMutex
237+
opts []backoff.ExponentialBackOffOpts
238+
}
239+
240+
// Delete objKey to the set of registered objects.
241+
func (r *BackOffRegistry) Delete(objKey client.ObjectKey) bool {
242+
r.mu.Lock()
243+
defer r.mu.Unlock()
244+
245+
_, ok := r.m[objKey]
246+
delete(r.m, objKey)
247+
return ok
248+
}
249+
250+
// Get is a getter/setter that returns the BackOff for objKey.
251+
// If objKey is not in the set of registered objects, it will be added. Return
252+
// true if the sync backoff entry was created.
253+
func (r *BackOffRegistry) Get(objKey client.ObjectKey) (*BackOff, bool) {
254+
r.mu.RLock()
255+
defer r.mu.RUnlock()
256+
257+
entry, ok := r.m[objKey]
258+
if !ok {
259+
entry = &BackOff{
260+
bo: backoff.NewExponentialBackOff(r.opts...),
261+
}
262+
r.m[objKey] = entry
263+
}
264+
265+
return entry, !ok
266+
}
267+
268+
// BackOff is a wrapper around backoff.BackOff that does not implement
269+
// BackOff.Reset, since elements in BackOffRegistry are meant to be ephemeral.
270+
type BackOff struct {
271+
bo backoff.BackOff
272+
}
273+
274+
// NextBackOff returns the next backoff duration.
275+
func (s *BackOff) NextBackOff() time.Duration {
276+
return s.bo.NextBackOff()
277+
}
278+
279+
// DefaultExponentialBackOffOpts returns the default exponential options for the
280+
func DefaultExponentialBackOffOpts() []backoff.ExponentialBackOffOpts {
281+
return []backoff.ExponentialBackOffOpts{
282+
backoff.WithInitialInterval(requeueDurationOnError),
283+
backoff.WithMaxInterval(time.Second * 60),
284+
}
285+
}
286+
287+
// NewBackOffRegistry returns a BackOffRegistry.
288+
func NewBackOffRegistry(opts ...backoff.ExponentialBackOffOpts) *BackOffRegistry {
289+
if len(opts) == 0 {
290+
opts = DefaultExponentialBackOffOpts()
291+
}
292+
293+
return &BackOffRegistry{
294+
m: map[client.ObjectKey]*BackOff{},
295+
opts: opts,
296+
}
297+
}

controllers/registry_test.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ package controllers
66
import (
77
"sync"
88
"testing"
9+
"time"
910

11+
"github.com/cenkalti/backoff/v4"
1012
"github.com/stretchr/testify/assert"
1113
"github.com/stretchr/testify/require"
1214
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -503,3 +505,118 @@ func TestSyncRegistry(t *testing.T) {
503505
})
504506
}
505507
}
508+
509+
func TestBackOffRegistry_Get(t *testing.T) {
510+
t.Parallel()
511+
512+
tests := []struct {
513+
name string
514+
m map[client.ObjectKey]*BackOff
515+
opts []backoff.ExponentialBackOffOpts
516+
objKey client.ObjectKey
517+
want *BackOff
518+
want1 bool
519+
}{
520+
{
521+
name: "new",
522+
m: map[client.ObjectKey]*BackOff{},
523+
objKey: client.ObjectKey{
524+
Namespace: "foo",
525+
Name: "bar",
526+
},
527+
want: &BackOff{
528+
bo: backoff.NewExponentialBackOff(
529+
DefaultExponentialBackOffOpts()...,
530+
),
531+
},
532+
want1: true,
533+
},
534+
{
535+
name: "previous",
536+
m: map[client.ObjectKey]*BackOff{
537+
{
538+
Namespace: "foo",
539+
Name: "bar",
540+
}: {
541+
bo: backoff.NewExponentialBackOff(
542+
DefaultExponentialBackOffOpts()...,
543+
),
544+
},
545+
},
546+
objKey: client.ObjectKey{
547+
Namespace: "foo",
548+
Name: "bar",
549+
},
550+
want: &BackOff{
551+
bo: backoff.NewExponentialBackOff(
552+
DefaultExponentialBackOffOpts()...,
553+
),
554+
},
555+
want1: false,
556+
},
557+
}
558+
for _, tt := range tests {
559+
t.Run(tt.name, func(t *testing.T) {
560+
r := &BackOffRegistry{
561+
m: tt.m,
562+
opts: tt.opts,
563+
}
564+
got, got1 := r.Get(tt.objKey)
565+
assert.NotNilf(t, got, "Get(%v)", tt.objKey)
566+
assert.Equalf(t, tt.want1, got1, "Get(%v)", tt.objKey)
567+
last := got.bo.NextBackOff()
568+
assert.Greaterf(t, last, time.Duration(0), "Get(%v)", tt.objKey)
569+
assert.Greaterf(t, got.bo.NextBackOff(), last, "Get(%v)", tt.objKey)
570+
})
571+
}
572+
}
573+
574+
func TestBackOffRegistry_Delete(t *testing.T) {
575+
t.Parallel()
576+
577+
tests := []struct {
578+
name string
579+
m map[client.ObjectKey]*BackOff
580+
opts []backoff.ExponentialBackOffOpts
581+
objKey client.ObjectKey
582+
want bool
583+
}{
584+
{
585+
name: "not-found",
586+
m: map[client.ObjectKey]*BackOff{},
587+
objKey: client.ObjectKey{
588+
Namespace: "foo",
589+
Name: "bar",
590+
},
591+
want: false,
592+
},
593+
{
594+
name: "deleted",
595+
m: map[client.ObjectKey]*BackOff{
596+
{
597+
Namespace: "foo",
598+
Name: "bar",
599+
}: {
600+
bo: backoff.NewExponentialBackOff(
601+
DefaultExponentialBackOffOpts()...,
602+
),
603+
},
604+
},
605+
objKey: client.ObjectKey{
606+
Namespace: "foo",
607+
Name: "bar",
608+
},
609+
want: true,
610+
},
611+
}
612+
for _, tt := range tests {
613+
t.Run(tt.name, func(t *testing.T) {
614+
r := &BackOffRegistry{
615+
m: tt.m,
616+
opts: tt.opts,
617+
}
618+
got := r.Delete(tt.objKey)
619+
assert.Equalf(t, tt.want, got, "Delete(%v)", tt.objKey)
620+
})
621+
}
622+
}

0 commit comments

Comments
 (0)