-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Use more reliable workflow mutation check #4076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4225,43 +4225,44 @@ func (ms *MutableStateImpl) eventsToReplicationTask( | |
| transactionPolicy TransactionPolicy, | ||
| events []*historypb.HistoryEvent, | ||
| ) error { | ||
|
|
||
| if transactionPolicy == TransactionPolicyPassive || | ||
| !ms.canReplicateEvents() || | ||
| len(events) == 0 { | ||
| switch transactionPolicy { | ||
| case TransactionPolicyActive: | ||
| if ms.generateReplicationTask() { | ||
| return ms.taskGenerator.GenerateHistoryReplicationTasks(events) | ||
| } | ||
| return nil | ||
| case TransactionPolicyPassive: | ||
| return nil | ||
| default: | ||
| panic(fmt.Sprintf("unknown transaction policy: %v", transactionPolicy)) | ||
| } | ||
|
|
||
| currentBranchToken, err := ms.GetCurrentBranchToken() | ||
| if err != nil { | ||
| return err | ||
| } | ||
| return ms.taskGenerator.GenerateHistoryReplicationTasks( | ||
| currentBranchToken, | ||
| events, | ||
| ) | ||
| } | ||
|
|
||
| func (ms *MutableStateImpl) syncActivityToReplicationTask( | ||
| now time.Time, | ||
| transactionPolicy TransactionPolicy, | ||
| ) []tasks.Task { | ||
|
|
||
| if transactionPolicy == TransactionPolicyPassive || | ||
| !ms.canReplicateEvents() { | ||
| switch transactionPolicy { | ||
| case TransactionPolicyActive: | ||
| if ms.generateReplicationTask() { | ||
| return convertSyncActivityInfos( | ||
| now, | ||
| definition.NewWorkflowKey( | ||
| ms.executionInfo.NamespaceId, | ||
| ms.executionInfo.WorkflowId, | ||
| ms.executionState.RunId, | ||
| ), | ||
| ms.pendingActivityInfoIDs, | ||
| ms.syncActivityTasks, | ||
| ) | ||
| } | ||
| return nil | ||
| case TransactionPolicyPassive: | ||
| return emptyTasks | ||
| default: | ||
| panic(fmt.Sprintf("unknown transaction policy: %v", transactionPolicy)) | ||
| } | ||
|
|
||
| return convertSyncActivityInfos( | ||
| now, | ||
| definition.NewWorkflowKey( | ||
| ms.executionInfo.NamespaceId, | ||
| ms.executionInfo.WorkflowId, | ||
| ms.executionState.RunId, | ||
| ), | ||
| ms.pendingActivityInfoIDs, | ||
| ms.syncActivityTasks, | ||
| ) | ||
| } | ||
|
|
||
| func (ms *MutableStateImpl) updatePendingEventIDs( | ||
|
|
@@ -4305,10 +4306,6 @@ func (ms *MutableStateImpl) updateWithLastWriteEvent( | |
| return nil | ||
| } | ||
|
|
||
| func (ms *MutableStateImpl) canReplicateEvents() bool { | ||
| return ms.namespaceEntry.ReplicationPolicy() == namespace.ReplicationPolicyMultiCluster | ||
| } | ||
|
|
||
| // validateNoEventsAfterWorkflowFinish perform check on history event batch | ||
| // NOTE: do not apply this check on every batch, since transient | ||
| // workflow task && workflow finish will be broken (the first batch) | ||
|
|
@@ -4464,23 +4461,24 @@ func (ms *MutableStateImpl) startTransactionHandleWorkflowTaskFailover() (bool, | |
| func (ms *MutableStateImpl) closeTransactionWithPolicyCheck( | ||
| transactionPolicy TransactionPolicy, | ||
| ) error { | ||
|
|
||
| if transactionPolicy == TransactionPolicyPassive || | ||
| !ms.canReplicateEvents() { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line is the real change |
||
| switch transactionPolicy { | ||
| case TransactionPolicyActive: | ||
| // Cannot use ms.namespaceEntry.ActiveClusterName() because currentVersion may be updated during this transaction in | ||
| // passive cluster. For example: if passive cluster sees conflict and decided to terminate this workflow. The | ||
| // currentVersion on mutable state would be updated to point to last write version which is current (passive) cluster. | ||
| activeCluster := ms.clusterMetadata.ClusterNameForFailoverVersion(ms.namespaceEntry.IsGlobalNamespace(), ms.GetCurrentVersion()) | ||
| currentCluster := ms.clusterMetadata.GetCurrentClusterName() | ||
|
|
||
| if activeCluster != currentCluster { | ||
| namespaceID := ms.GetExecutionInfo().NamespaceId | ||
| return serviceerror.NewNamespaceNotActive(namespaceID, currentCluster, activeCluster) | ||
| } | ||
| return nil | ||
| case TransactionPolicyPassive: | ||
| return nil | ||
| default: | ||
| panic(fmt.Sprintf("unknown transaction policy: %v", transactionPolicy)) | ||
| } | ||
|
|
||
| // Cannot use ms.namespaceEntry.ActiveClusterName() because currentVersion may be updated during this transaction in | ||
| // passive cluster. For example: if passive cluster sees conflict and decided to terminate this workflow. The | ||
| // currentVersion on mutable state would be updated to point to last write version which is current (passive) cluster. | ||
| activeCluster := ms.clusterMetadata.ClusterNameForFailoverVersion(ms.namespaceEntry.IsGlobalNamespace(), ms.GetCurrentVersion()) | ||
| currentCluster := ms.clusterMetadata.GetCurrentClusterName() | ||
|
|
||
| if activeCluster != currentCluster { | ||
| namespaceID := ms.GetExecutionInfo().NamespaceId | ||
| return serviceerror.NewNamespaceNotActive(namespaceID, currentCluster, activeCluster) | ||
| } | ||
| return nil | ||
| } | ||
|
|
||
| func (ms *MutableStateImpl) closeTransactionHandleBufferedEventsLimit( | ||
|
|
@@ -4559,17 +4557,24 @@ func (ms *MutableStateImpl) closeTransactionHandleWorkflowReset( | |
| func (ms *MutableStateImpl) closeTransactionHandleActivityUserTimerTasks( | ||
| transactionPolicy TransactionPolicy, | ||
| ) error { | ||
|
|
||
| if transactionPolicy == TransactionPolicyPassive || | ||
| !ms.IsWorkflowExecutionRunning() { | ||
| switch transactionPolicy { | ||
| case TransactionPolicyActive: | ||
| if !ms.IsWorkflowExecutionRunning() { | ||
| return nil | ||
| } | ||
| if err := ms.taskGenerator.GenerateActivityTimerTasks(); err != nil { | ||
| return err | ||
| } | ||
| return ms.taskGenerator.GenerateUserTimerTasks() | ||
| case TransactionPolicyPassive: | ||
| return nil | ||
| default: | ||
| panic(fmt.Sprintf("unknown transaction policy: %v", transactionPolicy)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. return an internal error?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is better to fail fast here, e.g. panic i would consider return error when
there are only 2 policies here, no safety concern |
||
| } | ||
| } | ||
|
|
||
| if err := ms.taskGenerator.GenerateActivityTimerTasks(); err != nil { | ||
| return err | ||
| } | ||
|
|
||
| return ms.taskGenerator.GenerateUserTimerTasks() | ||
| func (ms *MutableStateImpl) generateReplicationTask() bool { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the same as canReplicateEvents method. |
||
| return len(ms.namespaceEntry.ClusterNames()) > 1 | ||
| } | ||
|
|
||
| func (ms *MutableStateImpl) checkMutability( | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this assumption is broken
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we just reuse this one
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what is the difference?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the name canReplicateEvents make sense as it return a boolean. But it is up to you.