Skip to content

Commit 5fb4f1c

Browse files
authored
Rework health check (#2579)
The standard gRPC Health Check Service implementation is used instead of the custom implementation in Workflow Handler. This allows each separate service/handler to announce the state.
1 parent 9d69465 commit 5fb4f1c

12 files changed

+79
-168
lines changed

service/frontend/adminHandler.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ import (
3838
"go.temporal.io/api/serviceerror"
3939
workflowpb "go.temporal.io/api/workflow/v1"
4040
sdkclient "go.temporal.io/sdk/client"
41+
"google.golang.org/grpc/health"
42+
healthpb "google.golang.org/grpc/health/grpc_health_v1"
4143

4244
"go.temporal.io/server/api/adminservice/v1"
4345
clusterspb "go.temporal.io/server/api/cluster/v1"
@@ -111,6 +113,7 @@ type (
111113
saProvider searchattribute.Provider
112114
saManager searchattribute.Manager
113115
clusterMetadata cluster.Metadata
116+
healthServer *health.Server
114117
}
115118

116119
NewAdminHandlerArgs struct {
@@ -138,6 +141,7 @@ type (
138141
SaManager searchattribute.Manager
139142
ClusterMetadata cluster.Metadata
140143
ArchivalMetadata archiver.ArchivalMetadata
144+
HealthServer *health.Server
141145
}
142146
)
143147

@@ -196,17 +200,18 @@ func NewAdminHandler(
196200
saProvider: args.SaProvider,
197201
saManager: args.SaManager,
198202
clusterMetadata: args.ClusterMetadata,
203+
healthServer: args.HealthServer,
199204
}
200205
}
201206

202207
// Start starts the handler
203208
func (adh *AdminHandler) Start() {
204-
if !atomic.CompareAndSwapInt32(
209+
if atomic.CompareAndSwapInt32(
205210
&adh.status,
206211
common.DaemonStatusInitialized,
207212
common.DaemonStatusStarted,
208213
) {
209-
return
214+
adh.healthServer.SetServingStatus(AdminServiceName, healthpb.HealthCheckResponse_SERVING)
210215
}
211216

212217
// Start namespace replication queue cleanup
@@ -216,12 +221,12 @@ func (adh *AdminHandler) Start() {
216221

217222
// Stop stops the handler
218223
func (adh *AdminHandler) Stop() {
219-
if !atomic.CompareAndSwapInt32(
224+
if atomic.CompareAndSwapInt32(
220225
&adh.status,
221226
common.DaemonStatusStarted,
222227
common.DaemonStatusStopped,
223228
) {
224-
return
229+
adh.healthServer.SetServingStatus(AdminServiceName, healthpb.HealthCheckResponse_NOT_SERVING)
225230
}
226231

227232
// Calling stop if the queue does not start is ok

service/frontend/adminHandler_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ import (
3030
"fmt"
3131
"testing"
3232

33+
"google.golang.org/grpc/health"
34+
3335
"go.temporal.io/server/api/adminservicemock/v1"
3436
persistencespb "go.temporal.io/server/api/persistence/v1"
3537
"go.temporal.io/server/common/cluster"
@@ -145,6 +147,7 @@ func (s *adminHandlerSuite) SetupTest() {
145147
s.mockResource.GetSearchAttributesManager(),
146148
s.mockMetadata,
147149
s.mockResource.GetArchivalMetadata(),
150+
health.NewServer(),
148151
}
149152
s.handler = NewAdminHandler(args)
150153
s.handler.Start()

service/frontend/dcRedirectionHandler.go

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ import (
2929
"time"
3030

3131
"go.temporal.io/api/workflowservice/v1"
32-
healthpb "google.golang.org/grpc/health/grpc_health_v1"
3332

3433
"go.temporal.io/server/client"
3534
"go.temporal.io/server/common"
@@ -104,21 +103,6 @@ func (handler *DCRedirectionHandlerImpl) GetConfig() *Config {
104103
return handler.frontendHandler.GetConfig()
105104
}
106105

107-
// UpdateHealthStatus sets the health status for this rpc handler.
108-
// This health status will be used within the rpc health check handler
109-
func (handler *DCRedirectionHandlerImpl) UpdateHealthStatus(status HealthStatus) {
110-
handler.frontendHandler.UpdateHealthStatus(status)
111-
}
112-
113-
// Check is for health check
114-
func (handler *DCRedirectionHandlerImpl) Check(ctx context.Context, request *healthpb.HealthCheckRequest) (*healthpb.HealthCheckResponse, error) {
115-
return handler.frontendHandler.Check(ctx, request)
116-
}
117-
118-
func (handler *DCRedirectionHandlerImpl) Watch(request *healthpb.HealthCheckRequest, server healthpb.Health_WatchServer) error {
119-
return handler.frontendHandler.Watch(request, server)
120-
}
121-
122106
// Namespace APIs, namespace APIs does not require redirection
123107

124108
// DeprecateNamespace API call

service/frontend/dcRedirectionHandler_test.go

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import (
3434
taskqueuepb "go.temporal.io/api/taskqueue/v1"
3535
"go.temporal.io/api/workflowservice/v1"
3636
"go.temporal.io/api/workflowservicemock/v1"
37-
healthpb "google.golang.org/grpc/health/grpc_health_v1"
37+
"google.golang.org/grpc/health"
3838

3939
tokenspb "go.temporal.io/server/api/token/v1"
4040
"go.temporal.io/server/common/cluster"
@@ -125,6 +125,7 @@ func (s *dcRedirectionHandlerSuite) SetupTest() {
125125
s.mockResource.GetSearchAttributesProvider(),
126126
s.mockResource.GetClusterMetadata(),
127127
s.mockResource.GetArchivalMetadata(),
128+
health.NewServer(),
128129
)
129130

130131
s.mockFrontendHandler = workflowservicemock.NewMockWorkflowServiceServer(s.controller)
@@ -902,17 +903,6 @@ func (serverHandler *testServerHandler) Start() {
902903
func (serverHandler *testServerHandler) Stop() {
903904
}
904905

905-
func (serverHandler *testServerHandler) Check(context.Context, *healthpb.HealthCheckRequest) (*healthpb.HealthCheckResponse, error) {
906-
return nil, nil
907-
}
908-
909-
func (serverHandler *testServerHandler) Watch(*healthpb.HealthCheckRequest, healthpb.Health_WatchServer) error {
910-
return nil
911-
}
912-
913-
func (serverHandler *testServerHandler) UpdateHealthStatus(status HealthStatus) {
914-
}
915-
916906
func (serverHandler *testServerHandler) GetConfig() *Config {
917907
return nil
918908
}

service/frontend/fx.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
sdkclient "go.temporal.io/sdk/client"
3232
"go.uber.org/fx"
3333
"google.golang.org/grpc"
34+
"google.golang.org/grpc/health"
3435
"google.golang.org/grpc/keepalive"
3536

3637
"go.temporal.io/server/api/historyservice/v1"
@@ -83,8 +84,9 @@ var Module = fx.Options(
8384
fx.Provide(ThrottledLoggerRpsFnProvider),
8485
fx.Provide(PersistenceMaxQpsProvider),
8586
fx.Provide(FEReplicatorNamespaceReplicationQueueProvider),
86-
fx.Provide(HandlerProvider),
8787
fx.Provide(func(so []grpc.ServerOption) *grpc.Server { return grpc.NewServer(so...) }),
88+
fx.Provide(healthServerProvider),
89+
fx.Provide(HandlerProvider),
8890
fx.Provide(AdminHandlerProvider),
8991
fx.Provide(OperatorHandlerProvider),
9092
fx.Provide(NewVersionChecker),
@@ -96,6 +98,7 @@ var Module = fx.Options(
9698
func NewServiceProvider(
9799
serviceConfig *Config,
98100
server *grpc.Server,
101+
healthServer *health.Server,
99102
handler Handler,
100103
adminHandler *AdminHandler,
101104
operatorHandler *OperatorHandlerImpl,
@@ -109,6 +112,7 @@ func NewServiceProvider(
109112
return NewService(
110113
serviceConfig,
111114
server,
115+
healthServer,
112116
handler,
113117
adminHandler,
114118
operatorHandler,
@@ -346,6 +350,10 @@ func ServiceResolverProvider(membershipMonitor membership.Monitor) (membership.S
346350
return membershipMonitor.GetResolver(common.FrontendServiceName)
347351
}
348352

353+
func healthServerProvider() *health.Server {
354+
return health.NewServer()
355+
}
356+
349357
func AdminHandlerProvider(
350358
params *resource.BootstrapParams,
351359
config *Config,
@@ -371,6 +379,7 @@ func AdminHandlerProvider(
371379
saManager searchattribute.Manager,
372380
clusterMetadata cluster.Metadata,
373381
archivalMetadata archiver.ArchivalMetadata,
382+
healthServer *health.Server,
374383
) *AdminHandler {
375384
args := NewAdminHandlerArgs{
376385
params,
@@ -397,6 +406,7 @@ func AdminHandlerProvider(
397406
saManager,
398407
clusterMetadata,
399408
archivalMetadata,
409+
healthServer,
400410
}
401411
return NewAdminHandler(args)
402412
}
@@ -409,6 +419,7 @@ func OperatorHandlerProvider(
409419
metricsClient metrics.Client,
410420
saProvider searchattribute.Provider,
411421
saManager searchattribute.Manager,
422+
healthServer *health.Server,
412423
) *OperatorHandlerImpl {
413424
args := NewOperatorHandlerImplArgs{
414425
esConfig,
@@ -418,6 +429,7 @@ func OperatorHandlerProvider(
418429
metricsClient,
419430
saProvider,
420431
saManager,
432+
healthServer,
421433
}
422434
return NewOperatorHandlerImpl(args)
423435
}
@@ -445,6 +457,7 @@ func HandlerProvider(
445457
saProvider searchattribute.Provider,
446458
clusterMetadata cluster.Metadata,
447459
archivalMetadata archiver.ArchivalMetadata,
460+
healthServer *health.Server,
448461
) Handler {
449462
wfHandler := NewWorkflowHandler(
450463
serviceConfig,
@@ -464,6 +477,7 @@ func HandlerProvider(
464477
saProvider,
465478
clusterMetadata,
466479
archivalMetadata,
480+
healthServer,
467481
)
468482
handler := NewDCRedirectionHandler(wfHandler, params.DCRedirectionPolicy, logger, clientBean, metricsClient, timeSource, namespaceRegistry, clusterMetadata)
469483
return handler

service/frontend/interface.go

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,21 @@ import (
3030
"go.temporal.io/api/operatorservice/v1"
3131
"go.temporal.io/api/workflowservice/v1"
3232

33-
healthpb "google.golang.org/grpc/health/grpc_health_v1"
34-
3533
"go.temporal.io/server/common"
3634
)
3735

36+
const (
37+
WorkflowServiceName = "temporal.api.workflowservice.v1.WorkflowService"
38+
OperatorServiceName = "temporal.api.operatorservice.v1.OperatorService"
39+
AdminServiceName = "temporal.api.adminservice.v1.AdminService"
40+
)
41+
3842
type (
3943
// Handler is interface wrapping frontend workflow handler
4044
Handler interface {
4145
workflowservice.WorkflowServiceServer
4246
common.Daemon
4347

44-
// HealthServer is the health check method for the whole frontend server
45-
healthpb.HealthServer
46-
// UpdateHealthStatus sets the health status for this rpc handler.
47-
// This health status will be used within the rpc health check handler
48-
UpdateHealthStatus(status HealthStatus)
49-
5048
GetConfig() *Config
5149
}
5250

service/frontend/interface_mock.go

Lines changed: 0 additions & 42 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)