Skip to content

Commit ab22fa5

Browse files
author
Marcus Cobden
committed
Add PREPARING_TO_LEAVE state to ingester handover
1 parent 44235a3 commit ab22fa5

File tree

4 files changed

+28
-15
lines changed

4 files changed

+28
-15
lines changed

pkg/ingester/ingester_lifecycle.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,9 +198,7 @@ loop:
198198
break loop
199199
}
200200
}
201-
202-
// Mark ourselved as Leaving so no more samples are send to us.
203-
i.changeState(ring.LEAVING)
201+
i.changeState(ring.PREPARING_TO_LEAVE)
204202

205203
// Do the transferring / flushing on a background goroutine so we can continue
206204
// to heartbeat to consul.
@@ -318,14 +316,16 @@ func (i *Ingester) updateConsul() error {
318316
}
319317

320318
// changeState updates consul with state transitions for us. NB this must be
321-
// called from loop()! Use ChangeState for calls from outside of loop().
319+
// called from loop()!
320+
// Use ChangeState for calls from outside of loop() (unless the loop has shut down)
322321
func (i *Ingester) changeState(state ring.IngesterState) error {
323322
// Only the following state transitions can be triggered externally
324323
if !((i.state == ring.PENDING && state == ring.JOINING) || // triggered by TransferChunks at the beginning
325324
(i.state == ring.JOINING && state == ring.PENDING) || // triggered by TransferChunks on failure
326325
(i.state == ring.JOINING && state == ring.ACTIVE) || // triggered by TransferChunks on success
327326
(i.state == ring.PENDING && state == ring.ACTIVE) || // triggered by autoJoin
328-
(i.state == ring.ACTIVE && state == ring.LEAVING)) { // triggered by shutdown
327+
(i.state == ring.ACTIVE && state == ring.PREPARING_TO_LEAVE) || // triggered by shutdown
328+
(i.state == ring.PREPARING_TO_LEAVE && state == ring.LEAVING)) { // triggered by shutdown
329329
return fmt.Errorf("Changing ingester state from %v -> %v is disallowed", i.state, state)
330330
}
331331

@@ -343,6 +343,10 @@ func (i *Ingester) processShutdown() {
343343
flushRequired = false
344344
}
345345
}
346+
if i.state != ring.LEAVING {
347+
// Mark ourselved as Leaving so no more samples are send to us.
348+
i.changeState(ring.LEAVING)
349+
}
346350

347351
if flushRequired {
348352
i.flushAllChunks()
@@ -411,6 +415,12 @@ func (i *Ingester) transferChunks() error {
411415
}
412416

413417
sentChunks.Add(float64(len(chunks)))
418+
if i.state != ring.LEAVING {
419+
// Mark ourselved as Leaving so no more samples are send to us.
420+
// We wait until we have sent the first item through the stream, so that the remote
421+
// side has a chance to mark all the tokens for transfer.
422+
i.changeState(ring.LEAVING)
423+
}
414424
}
415425
}
416426

pkg/ring/replication_strategy.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ func (r *Ring) replicationStrategy(ingesters []*IngesterDesc, op Operation) (
4848

4949
// IsHealthy checks whether an ingester appears to be alive and heartbeating
5050
func (r *Ring) IsHealthy(ingester *IngesterDesc, op Operation) bool {
51-
if op == Write && ingester.State != ACTIVE {
51+
if op == Write && (ingester.State != ACTIVE && ingester.State != PREPARING_TO_LEAVE) {
5252
return false
5353
} else if op == Read && ingester.State == JOINING {
5454
return false

pkg/ring/ring.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -213,20 +213,22 @@ func (r *Ring) getInternal(key uint32, op Operation) (ReplicationSet, error) {
213213
distinctHosts[token.Ingester] = struct{}{}
214214
ingester := r.ringDesc.Ingesters[token.Ingester]
215215

216-
// TODO update comment
217-
// We do not want to Write to Ingesters that are not ACTIVE, but we do want
218-
// to write the extra replica somewhere. So we increase the size of the set
219-
// of replicas for the key. This means we have to also increase the
220-
// size of the replica set for read, but we can read from Leaving ingesters,
221-
// so don't skip it in this case.
216+
// We want to only Write to Ingesters that are ACTIVE (or PREPARING_TO_LEAVE), however
217+
// we still write to the same number of replicas. So we increase the size of the set of
218+
// replicas for the key.
219+
// To match this, we have to also increase the size of the replica set for read, but we
220+
// can still read from ingesters which are PREPARING_TO_LEAVE or LEAVING so don't skip it
221+
// in this case.
222222
// NB dead ingester will be filtered later (by replication_strategy.go).
223-
if op == Write && ingester.State != ACTIVE {
224-
if nextIngester := r.ringDesc.Ingesters[token.NextIngester]; token.NextIngester != "" && ingester.State == JOINING {
223+
if op == Write && !(ingester.State == ACTIVE || ingester.State == PREPARING_TO_LEAVE) {
224+
// If a token is being transferred to a new ingester, we can send writes to that ingester instead
225+
// where they will be queued until the transfer is complete
226+
if nextIngester := r.ringDesc.Ingesters[token.NextIngester]; nextIngester != nil && nextIngester.State == JOINING {
225227
ingester = nextIngester
226228
} else {
227229
n++
228230
}
229-
} else if op == Read && (ingester.State != ACTIVE && ingester.State != LEAVING) {
231+
} else if op == Read && !(ingester.State == ACTIVE || ingester.State == PREPARING_TO_LEAVE || ingester.State == LEAVING) {
230232
n++
231233
}
232234

pkg/ring/ring.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,5 @@ enum IngesterState {
2727

2828
PENDING = 2;
2929
JOINING = 3;
30+
PREPARING_TO_LEAVE = 4;
3031
}

0 commit comments

Comments
 (0)