Skip to content

Commit 74b51d8

Browse files
committed
Send default tasks to unversioned queue when user data disabled (#4610)
1 parent d4cd737 commit 74b51d8

File tree

5 files changed

+96
-28
lines changed

5 files changed

+96
-28
lines changed

service/frontend/workflow_handler.go

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -878,9 +878,10 @@ func (wh *WorkflowHandler) PollWorkflowTaskQueue(ctx context.Context, request *w
878878
return &workflowservice.PollWorkflowTaskQueueResponse{}, nil
879879
}
880880

881-
// These errors are expected based on certain client behavior. We should not log them, it'd be too noisy.
882-
var newerBuild *serviceerror.NewerBuildExists
883-
if errors.As(err, &newerBuild) {
881+
// These errors are expected from some versioning situations. We should not log them, it'd be too noisy.
882+
var newerBuild *serviceerror.NewerBuildExists // expected when versioned poller is superceded
883+
var failedPrecond *serviceerror.FailedPrecondition // expected when user data is disabled
884+
if errors.As(err, &newerBuild) || errors.As(err, &failedPrecond) {
884885
return nil, err
885886
}
886887

@@ -1115,9 +1116,10 @@ func (wh *WorkflowHandler) PollActivityTaskQueue(ctx context.Context, request *w
11151116
return &workflowservice.PollActivityTaskQueueResponse{}, nil
11161117
}
11171118

1118-
// These errors are expected based on certain client behavior. We should not log them, it'd be too noisy.
1119-
var newerBuild *serviceerror.NewerBuildExists
1120-
if errors.As(err, &newerBuild) {
1119+
// These errors are expected from some versioning situations. We should not log them, it'd be too noisy.
1120+
var newerBuild *serviceerror.NewerBuildExists // expected when versioned poller is superceded
1121+
var failedPrecond *serviceerror.FailedPrecondition // expected when user data is disabled
1122+
if errors.As(err, &newerBuild) || errors.As(err, &failedPrecond) {
11211123
return nil, err
11221124
}
11231125

service/matching/matchingEngine.go

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -458,12 +458,7 @@ func (e *matchingEngineImpl) DispatchSpooledTask(
458458
taskQueue, userDataChanged, err := e.redirectToVersionedQueueForAdd(
459459
ctx, unversionedOrigTaskQueue, directive, stickyInfo)
460460
if err != nil {
461-
// Return error for tasks with compatiblity constraints when user data is disabled so they can be retried later.
462-
// "default" directive tasks become unversioned.
463-
if !errors.Is(err, errUserDataDisabled) || directive.GetBuildId() != "" {
464-
return err
465-
}
466-
err = nil
461+
return err
467462
}
468463
sticky := stickyInfo.kind == enumspb.TASK_QUEUE_KIND_STICKY
469464
tqm, err := e.getTaskQueueManager(ctx, taskQueue, stickyInfo, !sticky)
@@ -957,9 +952,6 @@ func (e *matchingEngineImpl) GetWorkerBuildIdCompatibility(
957952
}
958953
userData, _, err := tqMgr.GetUserData(ctx)
959954
if err != nil {
960-
if _, ok := err.(*serviceerror.NotFound); ok {
961-
return &matchingservice.GetWorkerBuildIdCompatibilityResponse{}, nil
962-
}
963955
return nil, err
964956
}
965957
return &matchingservice.GetWorkerBuildIdCompatibilityResponse{
@@ -1503,7 +1495,11 @@ func (e *matchingEngineImpl) redirectToVersionedQueueForAdd(
15031495
// Have to look up versioning data.
15041496
userData, userDataChanged, err := baseTqm.GetUserData(ctx)
15051497
if err != nil {
1506-
return taskQueue, userDataChanged, err
1498+
if errors.Is(err, errUserDataDisabled) && buildId == "" {
1499+
// When user data disabled, send "default" tasks to unversioned queue.
1500+
return taskQueue, userDataChanged, nil
1501+
}
1502+
return nil, nil, err
15071503
}
15081504
data := userData.GetData().GetVersioningData()
15091505

service/matching/matchingEngine_test.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,31 @@ func (s *matchingEngineSuite) TestPollActivityTaskQueues_NamespaceHandover() {
547547
s.Equal(common.ErrNamespaceHandover.Error(), err.Error())
548548
}
549549

550+
func (s *matchingEngineSuite) TestPollWorkflowTask_UserDataDisabled() {
551+
s.matchingEngine.config.LoadUserData = dynamicconfig.GetBoolPropertyFnFilteredByTaskQueueInfo(false)
552+
taskQueue := s.T().Name()
553+
554+
resp, err := s.matchingEngine.PollWorkflowTaskQueue(context.Background(), &matchingservice.PollWorkflowTaskQueueRequest{
555+
NamespaceId: "asdf",
556+
PollRequest: &workflowservice.PollWorkflowTaskQueueRequest{
557+
Namespace: "asdf",
558+
TaskQueue: &taskqueuepb.TaskQueue{
559+
Name: taskQueue,
560+
Kind: enumspb.TASK_QUEUE_KIND_NORMAL,
561+
},
562+
Identity: "identity",
563+
WorkerVersionCapabilities: &commonpb.WorkerVersionCapabilities{
564+
BuildId: "some_build_id",
565+
UseVersioning: true,
566+
},
567+
},
568+
}, metrics.NoopMetricsHandler)
569+
s.Error(err)
570+
s.Nil(resp)
571+
var failedPrecondition *serviceerror.FailedPrecondition
572+
s.ErrorAs(err, &failedPrecondition)
573+
}
574+
550575
func (s *matchingEngineSuite) TestAddActivityTasks() {
551576
s.AddTasksTest(enumspb.TASK_QUEUE_TYPE_ACTIVITY, false)
552577
}

service/matching/taskQueueManager_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ func TestDeliverBufferTasks_NoPollers(t *testing.T) {
119119
tlm.taskReader.gorogrp.Wait()
120120
}
121121

122-
func TestDeliverBufferTasks_RetriesVersionedTaskWhenUserInfoDisabled(t *testing.T) {
122+
func TestDeliverBufferTasks_DisableUserData_SendsVersionedToUnversioned(t *testing.T) {
123123
t.Parallel()
124124

125125
controller := gomock.NewController(t)
@@ -154,7 +154,7 @@ func TestDeliverBufferTasks_RetriesVersionedTaskWhenUserInfoDisabled(t *testing.
154154
tlm.taskReader.gorogrp.Wait()
155155
}
156156

157-
func TestDeliverBufferTasks_RetriesUseDefaultTaskWhenUserInfoDisabled(t *testing.T) {
157+
func TestDeliverBufferTasks_DisableUserData_SendsDefaultToUnversioned(t *testing.T) {
158158
t.Parallel()
159159

160160
controller := gomock.NewController(t)

tests/versioning_test.go

Lines changed: 55 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ func (s *versioningIntegSuite) dispatchNewWorkflowStartWorkerFirst() {
397397
s.Equal("done!", out)
398398
}
399399

400-
func (s *versioningIntegSuite) TestDisableLoadUserDataDefaultTasksBecomeUnversioned() {
400+
func (s *versioningIntegSuite) TestDisableUserData_DefaultTasksBecomeUnversioned() {
401401
dc := s.testCluster.host.dcClient
402402
dc.OverrideValue(dynamicconfig.MatchingNumTaskqueueReadPartitions, 1)
403403
defer dc.RemoveOverride(dynamicconfig.MatchingNumTaskqueueReadPartitions)
@@ -1471,7 +1471,7 @@ func (s *versioningIntegSuite) dispatchCron() {
14711471
s.GreaterOrEqual(runs2.Load(), int32(3))
14721472
}
14731473

1474-
func (s *versioningIntegSuite) TestDisableLoadUserData() {
1474+
func (s *versioningIntegSuite) TestDisableUserData() {
14751475
tq := s.T().Name()
14761476
v1 := s.prefixed("v1")
14771477
v2 := s.prefixed("v2")
@@ -1482,6 +1482,9 @@ func (s *versioningIntegSuite) TestDisableLoadUserData() {
14821482
// First insert some data (we'll try to read it below)
14831483
s.addNewDefaultBuildId(ctx, tq, v1)
14841484

1485+
// unload so that we reload and pick up LoadUserData dynamic config
1486+
s.unloadTaskQueue(ctx, tq)
1487+
14851488
dc := s.testCluster.host.dcClient
14861489
defer dc.RemoveOverride(dynamicconfig.MatchingLoadUserData)
14871490
dc.OverrideValue(dynamicconfig.MatchingLoadUserData, false)
@@ -1509,7 +1512,34 @@ func (s *versioningIntegSuite) TestDisableLoadUserData() {
15091512
s.Require().ErrorAs(err, &failedPreconditionError)
15101513
}
15111514

1512-
func (s *versioningIntegSuite) TestWorkflowGetsStuckWhenDisablingLoadingUserData() {
1515+
func (s *versioningIntegSuite) TestDisableUserData_UnversionedWorkflowRuns() {
1516+
tq := s.T().Name()
1517+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
1518+
defer cancel()
1519+
1520+
dc := s.testCluster.host.dcClient
1521+
defer dc.RemoveOverride(dynamicconfig.MatchingLoadUserData)
1522+
dc.OverrideValue(dynamicconfig.MatchingLoadUserData, false)
1523+
1524+
wf := func(ctx workflow.Context) (string, error) {
1525+
return "ok", nil
1526+
}
1527+
wrk := worker.New(s.sdkClient, tq, worker.Options{})
1528+
wrk.RegisterWorkflowWithOptions(wf, workflow.RegisterOptions{Name: "wf"})
1529+
s.NoError(wrk.Start())
1530+
defer wrk.Stop()
1531+
1532+
run, err := s.sdkClient.ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{
1533+
TaskQueue: tq,
1534+
WorkflowExecutionTimeout: 5 * time.Second,
1535+
}, "wf")
1536+
s.NoError(err)
1537+
var out string
1538+
s.NoError(run.Get(ctx, &out))
1539+
s.Equal("ok", out)
1540+
}
1541+
1542+
func (s *versioningIntegSuite) TestDisableUserData_WorkflowGetsStuck() {
15131543
tq := s.T().Name()
15141544
v1 := s.prefixed("v1")
15151545
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
@@ -1528,7 +1558,7 @@ func (s *versioningIntegSuite) TestWorkflowGetsStuckWhenDisablingLoadingUserData
15281558
return nil
15291559
}
15301560
wrk := worker.New(s.sdkClient, tq, worker.Options{
1531-
BuildID: "v1",
1561+
BuildID: v1,
15321562
UseBuildIDForVersioning: true,
15331563
MaxConcurrentWorkflowTaskPollers: numPollers,
15341564
})
@@ -1538,16 +1568,31 @@ func (s *versioningIntegSuite) TestWorkflowGetsStuckWhenDisablingLoadingUserData
15381568

15391569
run, err := s.sdkClient.ExecuteWorkflow(ctx, sdkclient.StartWorkflowOptions{
15401570
TaskQueue: tq,
1541-
WorkflowExecutionTimeout: 5 * time.Second,
1571+
WorkflowExecutionTimeout: 10 * time.Second,
15421572
}, "wf")
15431573
s.Require().NoError(err)
1544-
err = run.Get(ctx, nil)
1545-
var timeoutError *temporal.TimeoutError
1546-
s.Require().ErrorAs(err, &timeoutError)
1574+
1575+
// should not run on versioned worker
1576+
time.Sleep(2 * time.Second)
15471577
s.Require().Equal(int32(0), runs.Load())
1578+
1579+
wrk.Stop()
1580+
1581+
// start unversioned worker and let task run there
1582+
wrk2 := worker.New(s.sdkClient, tq, worker.Options{
1583+
MaxConcurrentWorkflowTaskPollers: numPollers,
1584+
})
1585+
wrk2.RegisterWorkflowWithOptions(wf, workflow.RegisterOptions{Name: "wf"})
1586+
s.NoError(wrk2.Start())
1587+
defer wrk2.Stop()
1588+
1589+
// now workflow can complete
1590+
err = run.Get(ctx, nil)
1591+
s.NoError(err)
1592+
s.Require().Equal(int32(1), runs.Load())
15481593
}
15491594

1550-
func (s *versioningIntegSuite) TestWorkflowQueryTimesOutWhenDisablingLoadingUserData() {
1595+
func (s *versioningIntegSuite) TestDisableUserData_QueryTimesOut() {
15511596
tq := s.T().Name()
15521597
v1 := s.prefixed("v1")
15531598
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
@@ -1562,7 +1607,7 @@ func (s *versioningIntegSuite) TestWorkflowQueryTimesOutWhenDisablingLoadingUser
15621607
})
15631608
}
15641609
wrk := worker.New(s.sdkClient, tq, worker.Options{
1565-
BuildID: "v1",
1610+
BuildID: v1,
15661611
UseBuildIDForVersioning: true,
15671612
MaxConcurrentWorkflowTaskPollers: numPollers,
15681613
})

0 commit comments

Comments
 (0)