Skip to content

Commit 2df56d8

Browse files
authored
Fix empty primary may have dirty slots data due to bad migration (valkey-io#1285)
If we become an empty primary for some reason, we still need to check if we need to delete dirty slots, because we may have dirty slots data left over from a bad migration. Like the target node forcibly executes CLUSTER SETSLOT NODE to take over the slot without performing key migration. Signed-off-by: Binbin <[email protected]>
1 parent a2d22c6 commit 2df56d8

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

src/cluster_legacy.c

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2451,6 +2451,7 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
24512451
* need to delete all the keys in the slots we lost ownership. */
24522452
uint16_t dirty_slots[CLUSTER_SLOTS];
24532453
int dirty_slots_count = 0;
2454+
int delete_dirty_slots = 0;
24542455

24552456
/* We should detect if sender is new primary of our shard.
24562457
* We will know it if all our slots were migrated to sender, and sender
@@ -2677,6 +2678,12 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
26772678
serverLog(LL_NOTICE,
26782679
"My last slot was migrated to node %.40s (%s) in shard %.40s. I am now an empty primary.",
26792680
sender->name, sender->human_nodename, sender->shard_id);
2681+
/* We may still have dirty slots when we became a empty primary due to
2682+
* a bad migration.
2683+
*
2684+
* In order to maintain a consistent state between keys and slots
2685+
* we need to remove all the keys from the slots we lost. */
2686+
delete_dirty_slots = 1;
26802687
}
26812688
} else if (dirty_slots_count) {
26822689
/* If we are here, we received an update message which removed
@@ -2686,6 +2693,10 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
26862693
*
26872694
* In order to maintain a consistent state between keys and slots
26882695
* we need to remove all the keys from the slots we lost. */
2696+
delete_dirty_slots = 1;
2697+
}
2698+
2699+
if (delete_dirty_slots) {
26892700
for (int j = 0; j < dirty_slots_count; j++) {
26902701
serverLog(LL_NOTICE, "Deleting keys in dirty slot %d on node %.40s (%s) in shard %.40s", dirty_slots[j],
26912702
myself->name, myself->human_nodename, myself->shard_id);
@@ -6069,7 +6080,7 @@ void removeChannelsInSlot(unsigned int slot) {
60696080
/* Remove all the keys in the specified hash slot.
60706081
* The number of removed items is returned. */
60716082
unsigned int delKeysInSlot(unsigned int hashslot) {
6072-
if (!kvstoreDictSize(server.db->keys, hashslot)) return 0;
6083+
if (!countKeysInSlot(hashslot)) return 0;
60736084

60746085
unsigned int j = 0;
60756086

tests/unit/cluster/replica-migration.tcl

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,3 +400,23 @@ start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout
400400
start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout 1000 cluster-migration-barrier 999}} {
401401
test_cluster_setslot "setslot"
402402
} my_slot_allocation cluster_allocate_replicas ;# start_cluster
403+
404+
start_cluster 3 0 {tags {external:skip cluster} overrides {cluster-node-timeout 1000 cluster-migration-barrier 999}} {
405+
test "Empty primary will check and delete the dirty slots" {
406+
R 2 config set cluster-allow-replica-migration no
407+
408+
# Write a key to slot 0.
409+
R 2 incr key_977613
410+
411+
# Move slot 0 from primary 2 to primary 0.
412+
R 0 cluster bumpepoch
413+
R 0 cluster setslot 0 node [R 0 cluster myid]
414+
415+
# Wait for R 2 to report that it is an empty primary (cluster-allow-replica-migration no)
416+
wait_for_log_messages -2 {"*I am now an empty primary*"} 0 1000 50
417+
418+
# Make sure primary 0 will delete the dirty slots.
419+
verify_log_message -2 "*Deleting keys in dirty slot 0*" 0
420+
assert_equal [R 2 dbsize] 0
421+
}
422+
} my_slot_allocation cluster_allocate_replicas ;# start_cluster

0 commit comments

Comments
 (0)