Skip to content

Commit a274580

Browse files
authored
Fix record child workflow complete mutable state stale check (#2673)
1 parent 3af96c6 commit a274580

File tree

3 files changed

+52
-22
lines changed

3 files changed

+52
-22
lines changed

service/history/historyEngine.go

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,7 +1190,8 @@ func (e *historyEngineImpl) ResetStickyTaskQueue(
11901190
ctx,
11911191
namespaceID,
11921192
*resetRequest.Execution,
1193-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
1193+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
1194+
mutableState := workflowContext.getMutableState()
11941195
if !mutableState.IsWorkflowExecutionRunning() {
11951196
return nil, consts.ErrWorkflowCompleted
11961197
}
@@ -1385,7 +1386,8 @@ func (e *historyEngineImpl) RecordActivityTaskStarted(
13851386
ctx,
13861387
namespaceID,
13871388
execution,
1388-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
1389+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
1390+
mutableState := workflowContext.getMutableState()
13891391
if !mutableState.IsWorkflowExecutionRunning() {
13901392
return nil, consts.ErrWorkflowCompleted
13911393
}
@@ -1535,7 +1537,8 @@ func (e *historyEngineImpl) RespondActivityTaskCompleted(
15351537
ctx,
15361538
namespaceID,
15371539
workflowExecution,
1538-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
1540+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
1541+
mutableState := workflowContext.getMutableState()
15391542
workflowTypeName = mutableState.GetWorkflowType().GetName()
15401543
if !mutableState.IsWorkflowExecutionRunning() {
15411544
return nil, consts.ErrWorkflowCompleted
@@ -1614,7 +1617,8 @@ func (e *historyEngineImpl) RespondActivityTaskFailed(
16141617
var taskQueue string
16151618
var workflowTypeName string
16161619
err = e.updateWorkflowExecution(ctx, namespaceID, workflowExecution,
1617-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
1620+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
1621+
mutableState := workflowContext.getMutableState()
16181622
workflowTypeName = mutableState.GetWorkflowType().GetName()
16191623
if !mutableState.IsWorkflowExecutionRunning() {
16201624
return nil, consts.ErrWorkflowCompleted
@@ -1716,7 +1720,8 @@ func (e *historyEngineImpl) RespondActivityTaskCanceled(
17161720
ctx,
17171721
namespaceID,
17181722
workflowExecution,
1719-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
1723+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
1724+
mutableState := workflowContext.getMutableState()
17201725
workflowTypeName = mutableState.GetWorkflowType().GetName()
17211726
if !mutableState.IsWorkflowExecutionRunning() {
17221727
return nil, consts.ErrWorkflowCompleted
@@ -1810,7 +1815,8 @@ func (e *historyEngineImpl) RecordActivityTaskHeartbeat(
18101815
ctx,
18111816
namespaceID,
18121817
workflowExecution,
1813-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
1818+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
1819+
mutableState := workflowContext.getMutableState()
18141820
if !mutableState.IsWorkflowExecutionRunning() {
18151821
e.logger.Debug("Heartbeat failed")
18161822
return nil, consts.ErrWorkflowCompleted
@@ -1883,7 +1889,8 @@ func (e *historyEngineImpl) RequestCancelWorkflowExecution(
18831889
}
18841890

18851891
return e.updateWorkflow(ctx, namespaceID, execution,
1886-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
1892+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
1893+
mutableState := workflowContext.getMutableState()
18871894
if !mutableState.IsWorkflowExecutionRunning() {
18881895
// the request to cancel this workflow is a success even
18891896
// if the target workflow has already finished
@@ -1951,7 +1958,8 @@ func (e *historyEngineImpl) SignalWorkflowExecution(
19511958
ctx,
19521959
namespaceID,
19531960
execution,
1954-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
1961+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
1962+
mutableState := workflowContext.getMutableState()
19551963
if request.GetRequestId() != "" && mutableState.IsSignalRequested(request.GetRequestId()) {
19561964
return &updateWorkflowAction{
19571965
noop: true,
@@ -2259,7 +2267,8 @@ func (e *historyEngineImpl) RemoveSignalMutableState(
22592267
ctx,
22602268
namespaceID,
22612269
execution,
2262-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
2270+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
2271+
mutableState := workflowContext.getMutableState()
22632272
if !mutableState.IsWorkflowExecutionRunning() {
22642273
return nil, consts.ErrWorkflowCompleted
22652274
}
@@ -2299,7 +2308,8 @@ func (e *historyEngineImpl) TerminateWorkflowExecution(
22992308
ctx,
23002309
namespaceID,
23012310
execution,
2302-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
2311+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
2312+
mutableState := workflowContext.getMutableState()
23032313
if !mutableState.IsWorkflowExecutionRunning() {
23042314
return nil, consts.ErrWorkflowCompleted
23052315
}
@@ -2375,7 +2385,8 @@ func (e *historyEngineImpl) RecordChildExecutionCompleted(
23752385
ctx,
23762386
namespaceID,
23772387
execution,
2378-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
2388+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
2389+
mutableState := workflowContext.getMutableState()
23792390
if !mutableState.IsWorkflowExecutionRunning() {
23802391
return nil, consts.ErrWorkflowCompleted
23812392
}
@@ -2387,7 +2398,22 @@ func (e *historyEngineImpl) RecordChildExecutionCompleted(
23872398
// Check mutable state to make sure child execution is in pending child executions
23882399
ci, isRunning := mutableState.GetChildExecutionInfo(initiatedID)
23892400
if !isRunning && initiatedID >= mutableState.GetNextEventID() {
2390-
return nil, consts.ErrStaleState
2401+
// possible stale mutable state, try reload mutable state
2402+
//
2403+
// TODO: use initiate event ID and version to verify if the child exists or not
2404+
//
2405+
// NOTE: do not return ErrStaleState here, as in xdc there's no guarantee that parent
2406+
// will have the child information and its next eventID will larger than the initiatedID
2407+
// in the request after forced failover.
2408+
// If ErrStaleState is returned, the logic for this handler and processing of CloseWorkflowExecution
2409+
// task will keep retrying infinitely.
2410+
workflowContext.getContext().Clear()
2411+
mutableState, err = workflowContext.reloadMutableState(ctx)
2412+
if err != nil {
2413+
return nil, err
2414+
}
2415+
2416+
ci, isRunning = mutableState.GetChildExecutionInfo(initiatedID)
23912417
}
23922418
if !isRunning || ci.StartedId == common.EmptyEventID {
23932419
return nil, serviceerror.NewNotFound("Pending child execution not found.")
@@ -2663,11 +2689,8 @@ func (e *historyEngineImpl) updateWorkflowWithNewHelper(
26632689

26642690
UpdateHistoryLoop:
26652691
for attempt := 1; attempt <= conditionalRetryCount; attempt++ {
2666-
weContext := workflowContext.getContext()
2667-
mutableState := workflowContext.getMutableState()
2668-
26692692
// conduct caller action
2670-
postActions, err := action(weContext, mutableState)
2693+
postActions, err := action(workflowContext)
26712694
if err != nil {
26722695
if err == consts.ErrStaleState {
26732696
// Handler detected that cached workflow mutable could potentially be stale
@@ -2689,6 +2712,7 @@ UpdateHistoryLoop:
26892712
return nil
26902713
}
26912714

2715+
mutableState := workflowContext.getMutableState()
26922716
if postActions.createWorkflowTask {
26932717
// Create a transfer task to schedule a workflow task
26942718
if !mutableState.HasPendingWorkflowTask() {
@@ -3064,7 +3088,8 @@ func (e *historyEngineImpl) applyWorkflowIDReusePolicyHelper(
30643088
case enumsspb.WORKFLOW_EXECUTION_STATE_CREATED,
30653089
enumsspb.WORKFLOW_EXECUTION_STATE_RUNNING:
30663090
if wfIDReusePolicy == enumspb.WORKFLOW_ID_REUSE_POLICY_TERMINATE_IF_RUNNING {
3067-
return func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
3091+
return func(workflowContext workflowContext) (*updateWorkflowAction, error) {
3092+
mutableState := workflowContext.getMutableState()
30683093
if !mutableState.IsWorkflowExecutionRunning() {
30693094
return nil, consts.ErrWorkflowCompleted
30703095
}
@@ -3196,7 +3221,9 @@ func (e *historyEngineImpl) ReapplyEvents(
31963221
ctx,
31973222
namespaceID,
31983223
currentExecution,
3199-
func(context workflow.Context, mutableState workflow.MutableState) (action *updateWorkflowAction, retErr error) {
3224+
func(workflowContext workflowContext) (action *updateWorkflowAction, retErr error) {
3225+
context := workflowContext.getContext()
3226+
mutableState := workflowContext.getMutableState()
32003227
// Filter out reapply event from the same cluster
32013228
toReapplyEvents := make([]*historypb.HistoryEvent, 0, len(reapplyEvents))
32023229
lastWriteVersion, err := mutableState.GetLastWriteVersion()

service/history/workflowExecutionUtil.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ var (
5959
}
6060
)
6161

62-
type updateWorkflowActionFunc func(workflow.Context, workflow.MutableState) (*updateWorkflowAction, error)
62+
type updateWorkflowActionFunc func(workflowContext) (*updateWorkflowAction, error)
6363

6464
func (w *workflowContextImpl) getContext() workflow.Context {
6565
return w.context

service/history/workflowTaskHandlerCallbacks.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ func (handler *workflowTaskHandlerCallbacksImpl) handleWorkflowTaskScheduled(
128128
ctx,
129129
namespaceID,
130130
execution,
131-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
131+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
132+
mutableState := workflowContext.getMutableState()
132133
if !mutableState.IsWorkflowExecutionRunning() {
133134
return nil, consts.ErrWorkflowCompleted
134135
}
@@ -177,7 +178,8 @@ func (handler *workflowTaskHandlerCallbacksImpl) handleWorkflowTaskStarted(
177178
ctx,
178179
namespaceID,
179180
execution,
180-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
181+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
182+
mutableState := workflowContext.getMutableState()
181183
if !mutableState.IsWorkflowExecutionRunning() {
182184
return nil, consts.ErrWorkflowCompleted
183185
}
@@ -281,7 +283,8 @@ func (handler *workflowTaskHandlerCallbacksImpl) handleWorkflowTaskFailed(
281283
ctx,
282284
namespaceID,
283285
workflowExecution,
284-
func(context workflow.Context, mutableState workflow.MutableState) (*updateWorkflowAction, error) {
286+
func(workflowContext workflowContext) (*updateWorkflowAction, error) {
287+
mutableState := workflowContext.getMutableState()
285288
if !mutableState.IsWorkflowExecutionRunning() {
286289
return nil, consts.ErrWorkflowCompleted
287290
}

0 commit comments

Comments
 (0)