@@ -94,9 +94,6 @@ type Server struct {
9494 // strong consistency.
9595 fsm * consulFSM
9696
97- // left is true if we have attempted to leave the cluster.
98- left bool
99-
10097 // localConsuls is used to track the known consuls
10198 // in the local datacenter. Used to do leader forwarding.
10299 localConsuls map [raft.ServerAddress ]* agent.Server
@@ -106,17 +103,13 @@ type Server struct {
106103 logger * log.Logger
107104
108105 // The raft instance is used among Consul nodes within the DC to protect
109- // operations that require strong consistency. The raftSafeFn will get
110- // called on a graceful leave to help "safe" the state of the server so
111- // it won't interfere with other servers. This will be called after Raft
112- // is shutdown, but before the state store is closed, so it can manipulate
106+ // operations that require strong consistency.
113107 // the state directly.
114108 raft * raft.Raft
115109 raftLayer * RaftLayer
116110 raftStore * raftboltdb.BoltStore
117111 raftTransport * raft.NetworkTransport
118112 raftInmem * raft.InmemStore
119- raftSafeFn func () error
120113
121114 // reconcileCh is used to pass events from the serf handler
122115 // into the leader manager, so that the strong state can be
@@ -466,31 +459,6 @@ Please see https://www.consul.io/docs/guides/outage.html for more information.
466459 }
467460 s .logger .Printf ("[INFO] consul: deleted peers.json file after successful recovery" )
468461 }
469-
470- // Register a cleanup function to call when a leave occurs. This
471- // needs state that we only have access to here. This does a
472- // recover operation to an empty configuration so this server
473- // won't interfere with the rest of the cluster.
474- s .raftSafeFn = func () error {
475- hasState , err := raft .HasExistingState (log , stable , snap )
476- if err != nil {
477- return fmt .Errorf ("cleanup failed to check for state: %v" , err )
478- }
479- if ! hasState {
480- return nil
481- }
482-
483- tmpFsm , err := NewFSM (s .tombstoneGC , s .config .LogOutput )
484- if err != nil {
485- return fmt .Errorf ("cleanup failed to make temp FSM: %v" , err )
486- }
487- if err := raft .RecoverCluster (s .config .RaftConfig , tmpFsm ,
488- log , stable , snap , trans , raft.Configuration {}); err != nil {
489- return fmt .Errorf ("recovery failed: %v" , err )
490- }
491-
492- return nil
493- }
494462 }
495463
496464 // If we are in bootstrap or dev mode and the state is clean then we can
@@ -614,11 +582,6 @@ func (s *Server) Shutdown() error {
614582 if err := future .Error (); err != nil {
615583 s .logger .Printf ("[WARN] consul: error shutting down raft: %s" , err )
616584 }
617- if s .left && s .raftSafeFn != nil {
618- if err := s .raftSafeFn (); err != nil {
619- s .logger .Printf ("[WARN] consul: error safing raft: %s" , err )
620- }
621- }
622585 if s .raftStore != nil {
623586 s .raftStore .Close ()
624587 }
@@ -637,7 +600,6 @@ func (s *Server) Shutdown() error {
637600// Leave is used to prepare for a graceful shutdown of the server
638601func (s * Server ) Leave () error {
639602 s .logger .Printf ("[INFO] consul: server starting leave" )
640- s .left = true
641603
642604 // Check the number of known peers
643605 numPeers , err := s .numPeers ()
@@ -703,6 +665,17 @@ func (s *Server) Leave() error {
703665 }
704666 }
705667
668+ // TODO (slackpad) With the old Raft library we used to force the
669+ // peers set to empty when a graceful leave occurred. This would
670+ // keep voting spam down if the server was restarted, but it was
671+ // dangerous because the peers was inconsistent with the logs and
672+ // snapshots, so it wasn't really safe in all cases for the server
673+ // to become leader. This is now safe, but the log spam is noisy.
674+ // The next new version of the library will have a "you are not a
675+ // peer stop it" behavior that should address this. We will have
676+ // to evaluate during the RC period if this interim situation is
677+ // not too confusing for operators.
678+
706679 // TODO (slackpad) When we take a later new version of the Raft
707680 // library it won't try to complete replication, so this peer
708681 // may not realize that it has been removed. Need to revisit this
0 commit comments