Skip to content

Commit 7f44f34

Browse files
authored
Fixes MySQL persistence for workflow reset (#713)
After 2dc removal we trigger a slightly different codepath on mysql persistence when resetting workflows. AppendHistoryNodes when called with IsNewBranch add both history_tree metadata and history_node. This caused the problem because history_node is already added when fork was called so adding it again failed with duplicate error. Updated AppendHistoryNodes api to upsert instead. Another issue was with create workflow execution never expected to be called with `CreateWorkflowModeContinueAsNew` mode. With NDC now workflow reset is implemented in similar fashion as doing continue as new. So removed the check to disallow `CreateWorkflowModeContinueAsNew` on create execution and added appropriate validation.
1 parent 02ab64c commit 7f44f34

File tree

5 files changed

+234
-12
lines changed

5 files changed

+234
-12
lines changed

.vscode/launch.json

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
{
22
"version": "0.2.0",
33
"configurations": [
4-
54
{
65
"name": "Debug Server",
76
"type": "go",
@@ -26,6 +25,19 @@
2625
"start",
2726
]
2827
},
28+
{
29+
"name": "Debug Server with MySql",
30+
"type": "go",
31+
"request": "launch",
32+
"mode": "debug",
33+
"program": "${workspaceFolder}/cmd/server",
34+
"cwd": "${workspaceFolder}",
35+
"args": [
36+
"--env",
37+
"development_mysql",
38+
"start",
39+
]
40+
},
2941
{
3042
"name": "Debug CLI Namespace Describe",
3143
"type": "go",

common/persistence/sql/sqlExecutionManager.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,6 @@ func (m *sqlExecutionManager) createWorkflowExecutionTx(
124124
return nil, err
125125
}
126126

127-
switch request.Mode {
128-
case p.CreateWorkflowModeContinueAsNew:
129-
// cannot create workflow with continue as new mode
130-
return nil, serviceerror.NewInternal("CreateWorkflowExecution: operation failed, encounter invalid CreateWorkflowModeContinueAsNew")
131-
}
132-
133127
var err error
134128
var row *sqlplugin.CurrentExecutionsRow
135129
if row, err = lockCurrentExecutionIfExists(tx, m.shardID, namespaceID, workflowID); err != nil {
@@ -180,6 +174,16 @@ func (m *sqlExecutionManager) createWorkflowExecutionTx(
180174
return nil, err
181175
}
182176

177+
case p.CreateWorkflowModeContinueAsNew:
178+
runIDStr := row.RunID.String()
179+
if runIDStr != request.PreviousRunID {
180+
return nil, &p.CurrentWorkflowConditionFailedError{
181+
Msg: fmt.Sprintf("Workflow execution creation condition failed. WorkflowId: %v, "+
182+
"RunId: %v, PreviousRunId: %v",
183+
workflowID, runIDStr, request.PreviousRunID),
184+
}
185+
}
186+
183187
default:
184188
return nil, serviceerror.NewInternal(fmt.Sprintf("CreteWorkflowExecution: unknown mode: %v", request.Mode))
185189
}

common/persistence/sql/sqlHistoryManager.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ func (m *sqlHistoryV2Manager) AppendHistoryNodes(
123123
if rowsAffected != 1 {
124124
return fmt.Errorf("expected 1 row to be affected for node table, got %v", rowsAffected)
125125
}
126+
126127
result, err = tx.InsertIntoHistoryTree(treeRow)
127128
if err != nil {
128129
return err
@@ -131,8 +132,8 @@ func (m *sqlHistoryV2Manager) AppendHistoryNodes(
131132
if err != nil {
132133
return err
133134
}
134-
if rowsAffected != 1 {
135-
return fmt.Errorf("expected 1 row to be affected for tree table, got %v", rowsAffected)
135+
if !(rowsAffected == 1 || rowsAffected == 2) {
136+
return fmt.Errorf("expected 1 or 2 rows to be affected for tree table as we allow upserts, got %v", rowsAffected)
136137
}
137138
return nil
138139
})
@@ -355,6 +356,7 @@ func (m *sqlHistoryV2Manager) ForkHistoryBranch(
355356
Data: blob.Data,
356357
DataEncoding: blob.Encoding.String(),
357358
}
359+
358360
result, err := m.db.InsertIntoHistoryTree(row)
359361
if err != nil {
360362
return nil, err

common/persistence/sql/sqlplugin/mysql/events.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,11 @@ const (
4242
deleteHistoryNodesQuery = `DELETE FROM history_node WHERE shard_id = ? AND tree_id = ? AND branch_id = ? AND node_id >= ? `
4343

4444
// below are templates for history_tree table
45-
addHistoryTreeQuery = `INSERT INTO history_tree (` +
45+
upsertHistoryTreeQuery = `INSERT INTO history_tree (` +
4646
`shard_id, tree_id, branch_id, data, data_encoding) ` +
47-
`VALUES (:shard_id, :tree_id, :branch_id, :data, :data_encoding) `
47+
`VALUES (:shard_id, :tree_id, :branch_id, :data, :data_encoding) ` +
48+
`ON DUPLICATE KEY UPDATE ` +
49+
`data=VALUES(data), data_encoding=VALUES(data_encoding)`
4850

4951
getHistoryTreeQuery = `SELECT branch_id, data, data_encoding FROM history_tree WHERE shard_id = ? AND tree_id = ? `
5052

@@ -81,7 +83,7 @@ func (mdb *db) DeleteFromHistoryNode(filter *sqlplugin.HistoryNodeFilter) (sql.R
8183

8284
// InsertIntoHistoryTree inserts a row into history_tree table
8385
func (mdb *db) InsertIntoHistoryTree(row *sqlplugin.HistoryTreeRow) (sql.Result, error) {
84-
return mdb.conn.NamedExec(addHistoryTreeQuery, row)
86+
return mdb.conn.NamedExec(upsertHistoryTreeQuery, row)
8587
}
8688

8789
// SelectFromHistoryTree reads one or more rows from history_tree table

host/resetworkflow_test.go

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
// The MIT License
2+
//
3+
// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved.
4+
//
5+
// Copyright (c) 2020 Uber Technologies, Inc.
6+
//
7+
// Permission is hereby granted, free of charge, to any person obtaining a copy
8+
// of this software and associated documentation files (the "Software"), to deal
9+
// in the Software without restriction, including without limitation the rights
10+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11+
// copies of the Software, and to permit persons to whom the Software is
12+
// furnished to do so, subject to the following conditions:
13+
//
14+
// The above copyright notice and this permission notice shall be included in
15+
// all copies or substantial portions of the Software.
16+
//
17+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23+
// THE SOFTWARE.
24+
25+
package host
26+
27+
import (
28+
"bytes"
29+
"encoding/binary"
30+
"strconv"
31+
"time"
32+
33+
"github.com/pborman/uuid"
34+
commandpb "go.temporal.io/api/command/v1"
35+
commonpb "go.temporal.io/api/common/v1"
36+
enumspb "go.temporal.io/api/enums/v1"
37+
historypb "go.temporal.io/api/history/v1"
38+
taskqueuepb "go.temporal.io/api/taskqueue/v1"
39+
"go.temporal.io/api/workflowservice/v1"
40+
41+
"go.temporal.io/server/common/log/tag"
42+
"go.temporal.io/server/common/payloads"
43+
"go.temporal.io/server/common/primitives/timestamp"
44+
)
45+
46+
func (s *integrationSuite) TestResetWorkflow() {
47+
id := "integration-reset-workflow-test"
48+
wt := "integration-reset-workflow-test-type"
49+
tq := "integration-reset-workflow-test-taskqueue"
50+
identity := "worker1"
51+
52+
workflowType := &commonpb.WorkflowType{Name: wt}
53+
54+
taskQueue := &taskqueuepb.TaskQueue{Name: tq}
55+
56+
// Start workflow execution
57+
request := &workflowservice.StartWorkflowExecutionRequest{
58+
RequestId: uuid.New(),
59+
Namespace: s.namespace,
60+
WorkflowId: id,
61+
WorkflowType: workflowType,
62+
TaskQueue: taskQueue,
63+
Input: nil,
64+
WorkflowRunTimeout: timestamp.DurationPtr(100 * time.Second),
65+
WorkflowTaskTimeout: timestamp.DurationPtr(1 * time.Second),
66+
Identity: identity,
67+
}
68+
69+
we, err0 := s.engine.StartWorkflowExecution(NewContext(), request)
70+
s.NoError(err0)
71+
72+
s.Logger.Info("StartWorkflowExecution", tag.WorkflowRunID(we.RunId))
73+
74+
// workflow logic
75+
workflowComplete := false
76+
activityData := int32(1)
77+
activityCount := 3
78+
isFirstTaskProcessed := false
79+
isSecondTaskProcessed := false
80+
var firstActivityCompletionEvent *historypb.HistoryEvent
81+
wtHandler := func(execution *commonpb.WorkflowExecution, wt *commonpb.WorkflowType,
82+
previousStartedEventID, startedEventID int64, history *historypb.History) ([]*commandpb.Command, error) {
83+
84+
if !isFirstTaskProcessed {
85+
// Schedule 3 activities on first workflow task
86+
isFirstTaskProcessed = true
87+
buf := new(bytes.Buffer)
88+
s.Nil(binary.Write(buf, binary.LittleEndian, activityData))
89+
90+
var scheduleActivityCommands []*commandpb.Command
91+
for i := 1; i <= activityCount; i++ {
92+
scheduleActivityCommands = append(scheduleActivityCommands, &commandpb.Command{
93+
CommandType: enumspb.COMMAND_TYPE_SCHEDULE_ACTIVITY_TASK,
94+
Attributes: &commandpb.Command_ScheduleActivityTaskCommandAttributes{ScheduleActivityTaskCommandAttributes: &commandpb.ScheduleActivityTaskCommandAttributes{
95+
ActivityId: strconv.Itoa(i),
96+
ActivityType: &commonpb.ActivityType{Name: "ResetActivity"},
97+
TaskQueue: &taskqueuepb.TaskQueue{Name: tq},
98+
Input: payloads.EncodeBytes(buf.Bytes()),
99+
ScheduleToCloseTimeout: timestamp.DurationPtr(100 * time.Second),
100+
ScheduleToStartTimeout: timestamp.DurationPtr(2 * time.Second),
101+
StartToCloseTimeout: timestamp.DurationPtr(50 * time.Second),
102+
HeartbeatTimeout: timestamp.DurationPtr(5 * time.Second),
103+
}},
104+
})
105+
}
106+
107+
return scheduleActivityCommands, nil
108+
} else if !isSecondTaskProcessed {
109+
// Confirm one activity completion on second workflow task
110+
isSecondTaskProcessed = true
111+
for _, event := range history.Events[previousStartedEventID:] {
112+
if event.GetEventType() == enumspb.EVENT_TYPE_ACTIVITY_TASK_COMPLETED {
113+
firstActivityCompletionEvent = event
114+
return []*commandpb.Command{}, nil
115+
}
116+
}
117+
}
118+
119+
// Complete workflow after reset
120+
workflowComplete = true
121+
return []*commandpb.Command{{
122+
CommandType: enumspb.COMMAND_TYPE_COMPLETE_WORKFLOW_EXECUTION,
123+
Attributes: &commandpb.Command_CompleteWorkflowExecutionCommandAttributes{CompleteWorkflowExecutionCommandAttributes: &commandpb.CompleteWorkflowExecutionCommandAttributes{
124+
Result: payloads.EncodeString("Done"),
125+
}},
126+
}}, nil
127+
128+
}
129+
130+
// activity handler
131+
atHandler := func(execution *commonpb.WorkflowExecution, activityType *commonpb.ActivityType,
132+
activityID string, input *commonpb.Payloads, taskToken []byte) (*commonpb.Payloads, bool, error) {
133+
134+
return payloads.EncodeString("Activity Result"), false, nil
135+
}
136+
137+
poller := &TaskPoller{
138+
Engine: s.engine,
139+
Namespace: s.namespace,
140+
TaskQueue: taskQueue,
141+
Identity: identity,
142+
WorkflowTaskHandler: wtHandler,
143+
ActivityTaskHandler: atHandler,
144+
Logger: s.Logger,
145+
T: s.T(),
146+
}
147+
148+
// Process first workflow task to schedule activities
149+
_, err := poller.PollAndProcessWorkflowTask(false, false)
150+
s.Logger.Info("PollAndProcessWorkflowTask", tag.Error(err))
151+
s.NoError(err)
152+
153+
// Process one activity task which also creates second workflow task
154+
err = poller.PollAndProcessActivityTask(false)
155+
s.Logger.Info("Poll and process first activity", tag.Error(err))
156+
s.NoError(err)
157+
158+
// Process second workflow task which checks activity completion
159+
_, err = poller.PollAndProcessWorkflowTask(false, false)
160+
s.Logger.Info("Poll and process second workflow task", tag.Error(err))
161+
s.NoError(err)
162+
163+
// Find reset point (last completed workflow task)
164+
events := s.getHistory(s.namespace, &commonpb.WorkflowExecution{
165+
WorkflowId: id,
166+
RunId: we.GetRunId(),
167+
})
168+
var lastWorkflowTask *historypb.HistoryEvent
169+
for _, event := range events {
170+
if event.GetEventType() == enumspb.EVENT_TYPE_WORKFLOW_TASK_COMPLETED {
171+
lastWorkflowTask = event
172+
}
173+
}
174+
175+
// Reset workflow execution
176+
_, err = s.engine.ResetWorkflowExecution(NewContext(), &workflowservice.ResetWorkflowExecutionRequest{
177+
Namespace: s.namespace,
178+
WorkflowExecution: &commonpb.WorkflowExecution{
179+
WorkflowId: id,
180+
RunId: we.RunId,
181+
},
182+
Reason: "reset execution from test",
183+
WorkflowTaskFinishEventId: lastWorkflowTask.GetEventId(),
184+
RequestId: uuid.New(),
185+
})
186+
s.NoError(err)
187+
188+
err = poller.PollAndProcessActivityTask(false)
189+
s.Logger.Info("Poll and process second activity", tag.Error(err))
190+
s.NoError(err)
191+
192+
err = poller.PollAndProcessActivityTask(false)
193+
s.Logger.Info("Poll and process third activity", tag.Error(err))
194+
s.NoError(err)
195+
196+
_, err = poller.PollAndProcessWorkflowTask(false, false)
197+
s.Logger.Info("Poll and process final workflow task", tag.Error(err))
198+
s.NoError(err)
199+
200+
s.NotNil(firstActivityCompletionEvent)
201+
s.True(workflowComplete)
202+
}

0 commit comments

Comments
 (0)