@@ -39,7 +39,8 @@ import (
3939)
4040
4141const (
42- servicePathFormat = "/ms/%d/router/registry/" // "/ms/{cluster_id}/router/registry/"
42+ // "/ms/{cluster_id}/router/registry/"
43+ servicePathFormat = "/ms/%d/router/registry/"
4344)
4445
4546var _ ServiceDiscovery = (* routerServiceDiscovery )(nil )
@@ -64,9 +65,10 @@ type routerServiceDiscovery struct {
6465
6566 checkMembershipCh chan struct {}
6667
67- ctx context.Context
68- cancel context.CancelFunc
69- wg sync.WaitGroup
68+ parentCtx context.Context
69+ ctx context.Context
70+ cancel context.CancelFunc
71+ wg sync.WaitGroup
7072
7173 // Client option.
7274 option * opt.Option
@@ -108,6 +110,8 @@ func (r *routerServiceDiscovery) GetOrCreateGRPCConn(url string) (*grpc.ClientCo
108110// ScheduleCheckMemberChanged schedules a check for member changes.
109111func (r * routerServiceDiscovery ) ScheduleCheckMemberChanged () {
110112 select {
113+ case <- r .parentCtx .Done ():
114+ log .Info ("[router service] service discovery is shutting down" )
111115 case r .checkMembershipCh <- struct {}{}:
112116 default :
113117 }
@@ -132,12 +136,10 @@ func NewRouterServiceDiscovery(
132136 ctx context.Context , metaCli metastorage.Client , serviceDiscovery ServiceDiscovery ,
133137 tlsCfg * tls.Config , option * opt.Option ,
134138) ServiceDiscovery {
135- ctx , cancel := context .WithCancel (ctx )
136139 balancer := newServiceBalancer (emptyErrorFn )
137140 c := & routerServiceDiscovery {
138- ctx : ctx ,
141+ parentCtx : ctx ,
139142 ServiceDiscovery : serviceDiscovery ,
140- cancel : cancel ,
141143 metaCli : metaCli ,
142144 tlsCfg : tlsCfg ,
143145 option : option ,
@@ -149,7 +151,7 @@ func NewRouterServiceDiscovery(
149151 // will be discovered later.
150152 c .defaultDiscoveryKey = fmt .Sprintf (servicePathFormat , c .GetClusterID ())
151153
152- log .Info ("created router service discovery" ,
154+ log .Info ("[router service] created router service discovery" ,
153155 zap .Uint64 ("cluster-id" , c .GetClusterID ()),
154156 zap .Uint32 ("keyspace-id" , c .GetKeyspaceID ()),
155157 zap .String ("default-discovery-key" , c .defaultDiscoveryKey ))
@@ -158,13 +160,13 @@ func NewRouterServiceDiscovery(
158160
159161// Init initialize the concrete client underlying
160162func (r * routerServiceDiscovery ) Init () error {
161- log .Info ("initializing router service discovery" ,
163+ log .Info ("[router service] initializing router service discovery" ,
162164 zap .Int ("max-retry-times" , r .option .MaxRetryTimes ),
163165 zap .Duration ("retry-interval" , initRetryInterval ))
166+ r .ctx , r .cancel = context .WithCancel (r .parentCtx )
164167 if err := r .CheckMemberChanged (); err != nil {
165- r .cancel ()
166- log .Warn ("failed to initialize router service discovery" , zap .Error (err ))
167- return err
168+ // Initial check failed, log and continue to run the background loop.
169+ log .Warn ("[router service] failed to initialize router service discovery" , zap .Error (err ))
168170 }
169171 r .wg .Add (2 )
170172 go r .startCheckMemberLoop ()
@@ -203,19 +205,21 @@ func (r *routerServiceDiscovery) updateNodes(urls []string) {
203205 if client .(* serviceClient ).GetClientConn () == nil {
204206 conn , err := r .GetOrCreateGRPCConn (url )
205207 if err != nil || conn == nil {
206- log .Warn ("[pd] failed to connect follower" , zap .String ("follower" , url ), errs .ZapError (err ))
208+ log .Warn ("[router service] failed to connect router service" ,
209+ zap .String ("new-url" , newURL ), errs .ZapError (err ))
207210 continue
208211 }
209212 node := newPDServiceClient (url , r .GetServingURL (), conn , false )
210213 r .nodes .Store (url , node )
211214 }
212215 } else {
213216 conn , err := r .GetOrCreateGRPCConn (url )
214- follower := newPDServiceClient (url , r .GetServingURL (), conn , false )
215217 if err != nil || conn == nil {
216- log .Warn ("[pd] failed to connect follower" , zap .String ("follower" , url ), errs .ZapError (err ))
218+ log .Warn ("[router service] failed to connect follower" ,
219+ zap .String ("url" , url ), errs .ZapError (err ))
217220 }
218- r .nodes .LoadOrStore (url , follower )
221+ nodeClient := newPDServiceClient (url , r .GetServingURL (), conn , false )
222+ r .nodes .LoadOrStore (url , nodeClient )
219223 }
220224 }
221225 }
@@ -224,6 +228,9 @@ func (r *routerServiceDiscovery) updateNodes(urls []string) {
224228 clients = append (clients , value .(* serviceClient ))
225229 return true
226230 })
231+ log .Info ("[router service] updating nodes succeeded" ,
232+ zap .Strings ("urls" , urls ),
233+ zap .Int ("clients-length" , len (clients )))
227234 r .balancer .set (clients )
228235}
229236
@@ -261,7 +268,7 @@ func (r *routerServiceDiscovery) startCheckMemberLoop() {
261268 // so that we can speed up the process of router service discovery when failover happens on the
262269 // router service side and also ensures it won't call updateMember too frequently during normal time.
263270 if err := r .CheckMemberChanged (); err != nil {
264- log .Error ("[router service] failed to update member" , errs .ZapError (err ))
271+ log .Warn ("[router service] failed to update member" , errs .ZapError (err ))
265272 }
266273 }
267274}
@@ -287,19 +294,23 @@ func innerRetry(
287294
288295// Close releases all resources
289296func (r * routerServiceDiscovery ) Close () {
290- log .Info ("closing router service discovery" )
291- r .cancel ()
292- r .wg .Wait ()
297+ log .Info ("[router service] closing router service discovery" )
298+ if r .cancel != nil {
299+ r .cancel ()
300+ }
293301
294302 r .clientConns .Range (func (key , cc any ) bool {
295303 if err := cc .(* grpc.ClientConn ).Close (); err != nil {
296- log .Error ("[router service] failed to close gRPC clientConn" , errs .ZapError (errs .ErrCloseGRPCConn , err ))
304+ log .Warn ("[router service] failed to close gRPC clientConn" , errs .ZapError (errs .ErrCloseGRPCConn , err ))
297305 }
298306 r .clientConns .Delete (key )
299307 return true
300308 })
301-
302- log .Info ("router service discovery is closed" )
309+ r .sortedUrls .Store ([]string {})
310+ r .balancer .clean ()
311+ r .nodes .Clear ()
312+ r .wg .Wait ()
313+ log .Info ("[router service] is closed" )
303314}
304315
305316// getMSMembers returns all the members of the specified service name.
@@ -315,7 +326,8 @@ func getMSMembers(ctx context.Context, serviceKey string, client metastorage.Cli
315326 for _ , kv := range resp .GetKvs () {
316327 var entry ServiceRegistryEntry
317328 if err = entry .Deserialize (kv .Value ); err != nil {
318- log .Error ("try to deserialize service registry entry failed" , zap .String ("key" , string (kv .Key )), zap .Error (err ))
329+ log .Warn ("[router service] try to deserialize service registry entry failed" ,
330+ zap .String ("key" , string (kv .Key )), zap .Error (err ))
319331 continue
320332 }
321333 ret = append (ret , entry .ServiceAddr )
@@ -335,20 +347,10 @@ type ServiceRegistryEntry struct {
335347 StartTimestamp int64 `json:"start-timestamp"`
336348}
337349
338- // Serialize this service registry entry
339- func (e * ServiceRegistryEntry ) Serialize () (serializedValue string , err error ) {
340- data , err := json .Marshal (e )
341- if err != nil {
342- log .Error ("json marshal the service registry entry failed" , zap .Error (err ))
343- return "" , err
344- }
345- return string (data ), nil
346- }
347-
348350// Deserialize the data to this service registry entry
349351func (e * ServiceRegistryEntry ) Deserialize (data []byte ) error {
350352 if err := json .Unmarshal (data , e ); err != nil {
351- log .Error ( " json unmarshal the service registry entry failed" , zap .Error (err ))
353+ log .Warn ( "[router service] json unmarshal the service registry entry failed" , zap .Error (err ))
352354 return err
353355 }
354356 return nil
@@ -366,6 +368,7 @@ func (r *routerServiceDiscovery) nodeHealthCheckLoop() {
366368 for {
367369 select {
368370 case <- r .ctx .Done ():
371+ log .Info ("[router service] exit health check member loop" )
369372 return
370373 case <- ticker .C :
371374 r .checkNodeHealth (nodeCheckLoopCtx )
0 commit comments