Skip to content

Commit f7ef3a8

Browse files
committed
Fixed flaky TestMultitenantAlertmanager_SyncOnRingTopologyChanges
Signed-off-by: Marco Pracucci <[email protected]>
1 parent e658571 commit f7ef3a8

File tree

2 files changed

+11
-5
lines changed

2 files changed

+11
-5
lines changed

pkg/alertmanager/multitenant.go

+10-4
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,10 @@ type MultitenantAlertmanager struct {
250250
distributor *Distributor
251251
grpcServer *server.Server
252252

253+
// Last ring state. This variable is not protected with a mutex because it's always
254+
// accessed by a single goroutine at a time.
255+
ringLastState ring.ReplicationSet
256+
253257
// Subservices manager (ring, lifecycler)
254258
subservices *services.Manager
255259
subservicesWatcher *services.FailureWatcher
@@ -488,6 +492,10 @@ func (am *MultitenantAlertmanager) starting(ctx context.Context) (err error) {
488492
}
489493

490494
if am.cfg.ShardingEnabled {
495+
// Store the ring state after the initial Alertmanager configs sync has been done and before we do change
496+
// our state in the ring.
497+
am.ringLastState, _ = am.ring.GetAllHealthy(RingOp)
498+
491499
// Make sure that all the alertmanagers we were initially configured with have
492500
// fetched state from the replicas, before advertising as ACTIVE. This will
493501
// reduce the possibility that we lose state when new instances join/leave.
@@ -631,10 +639,8 @@ func (am *MultitenantAlertmanager) run(ctx context.Context) error {
631639
defer tick.Stop()
632640

633641
var ringTickerChan <-chan time.Time
634-
var ringLastState ring.ReplicationSet
635642

636643
if am.cfg.ShardingEnabled {
637-
ringLastState, _ = am.ring.GetAllHealthy(RingOp)
638644
ringTicker := time.NewTicker(util.DurationWithJitter(am.cfg.ShardingRing.RingCheckPeriod, 0.2))
639645
defer ringTicker.Stop()
640646
ringTickerChan = ringTicker.C
@@ -656,8 +662,8 @@ func (am *MultitenantAlertmanager) run(ctx context.Context) error {
656662
// replication set which we use to compare with the previous state.
657663
currRingState, _ := am.ring.GetAllHealthy(RingOp)
658664

659-
if ring.HasReplicationSetChanged(ringLastState, currRingState) {
660-
ringLastState = currRingState
665+
if ring.HasReplicationSetChanged(am.ringLastState, currRingState) {
666+
am.ringLastState = currRingState
661667
if err := am.loadAndSyncConfigs(ctx, reasonRingChange); err != nil {
662668
level.Warn(am.logger).Log("msg", "error while synchronizing alertmanager configs", "err", err)
663669
}

pkg/alertmanager/multitenant_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -1330,7 +1330,7 @@ func TestMultitenantAlertmanager_SyncOnRingTopologyChanges(t *testing.T) {
13301330
if tt.expected {
13311331
expectedSyncs++
13321332
}
1333-
test.Poll(t, 5*time.Second, float64(expectedSyncs), func() interface{} {
1333+
test.Poll(t, 3*time.Second, float64(expectedSyncs), func() interface{} {
13341334
metrics := regs.BuildMetricFamiliesPerUser()
13351335
return metrics.GetSumOfCounters("cortex_alertmanager_sync_configs_total")
13361336
})

0 commit comments

Comments
 (0)