temporalio
diff --git a/‎cmd/cli/run_scenario.go‎
Lines changed: 26 additions & 4 deletions b/‎cmd/cli/run_scenario.go‎
Lines changed: 26 additions & 4 deletions
diff --git a/‎loadgen/generic_executor.go‎
Lines changed: 109 additions & 3 deletions b/‎loadgen/generic_executor.go‎
Lines changed: 109 additions & 3 deletions
diff --git a/‎loadgen/generic_executor_test.go‎
Lines changed: 31 additions & 0 deletions b/‎loadgen/generic_executor_test.go‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎loadgen/kitchen_sink_executor.go‎
Lines changed: 19 additions & 18 deletions b/‎loadgen/kitchen_sink_executor.go‎
Lines changed: 19 additions & 18 deletions
diff --git a/‎loadgen/scenario.go‎
Lines changed: 20 additions & 0 deletions b/‎loadgen/scenario.go‎
Lines changed: 20 additions & 0 deletions
@@ -2,6 +2,7 @@ package cli
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"os"
 	"strings"
@@ -57,6 +58,7 @@ type scenarioRunConfig struct {
 	timeout                       time.Duration
 	doNotRegisterSearchAttributes bool
 	ignoreAlreadyStarted          bool
+	continueOnError               bool
 }
 
 func (r *scenarioRunner) addCLIFlags(fs *pflag.FlagSet) {
@@ -84,6 +86,8 @@ func (r *scenarioRunConfig) addCLIFlags(fs *pflag.FlagSet) {
 			"If the search attributes are not registed by the scenario they must be registered through some other method")
 	fs.BoolVar(&r.ignoreAlreadyStarted, "ignore-already-started", false,
 		"Ignore if a workflow with the same ID already exists. A Scenario may choose to override this behavior.")
+	fs.BoolVar(&r.continueOnError, "continue-on-error", false,
+		"Continue running iterations even when an iteration fails after all retries are exhausted")
 }
 
 func (r *scenarioRunner) preRun() {
@@ -160,15 +164,33 @@ func (r *scenarioRunner) run(ctx context.Context) error {
 			Timeout:                       r.timeout,
 			DoNotRegisterSearchAttributes: r.doNotRegisterSearchAttributes,
 			IgnoreAlreadyStarted:          r.ignoreAlreadyStarted,
+			ContinueOnError:               r.continueOnError,
 		},
 		ScenarioOptions: scenarioOptions,
 		Namespace:       r.clientOptions.Namespace,
 		RootPath:        repoDir,
 	}
 	executor := scenario.ExecutorFn()
-	err = executor.Run(ctx, scenarioInfo)
-	if err != nil {
-		return fmt.Errorf("failed scenario: %w", err)
+
+	// Phase 1: Run the scenario
+	r.logger.Info("Running scenario")
+	scenarioErr := executor.Run(ctx, scenarioInfo)
+
+	// Collect all errors
+	var allErrors []error
+	if scenarioErr != nil {
+		allErrors = append(allErrors, fmt.Errorf("scenario execution: %w", scenarioErr))
 	}
-	return nil
+
+	// Phase 2: Run verifications
+	if verifiable, ok := executor.(loadgen.Verifyable); ok {
+		r.logger.Info("Running verifications")
+		verifyErrs := verifiable.VerifyRun(ctx, scenarioInfo)
+		for _, err := range verifyErrs {
+			allErrors = append(allErrors, fmt.Errorf("verification: %w", err))
+		}
+	}
+
+	// Aggregate all errors
+	return errors.Join(allErrors...)
 }
@@ -2,16 +2,27 @@ package loadgen
 
 import (
 	"context"
+	"errors"
 	"fmt"
+	"sync"
 	"time"
 
+	"go.temporal.io/api/serviceerror"
 	"go.temporal.io/sdk/client"
 	"go.uber.org/zap"
 )
 
 type GenericExecutor struct {
 	// Function to execute a single iteration of this scenario
 	Execute func(context.Context, *Run) error
+
+	// WorkflowCompletionChecker is optional - when set, enables verification of workflow completions.
+	// When nil (default), verification is disabled.
+	WorkflowCompletionChecker *WorkflowCompletionChecker
+
+	// State management
+	mu    sync.Mutex
+	state *ExecutorState
 }
 
 type genericRun struct {
@@ -24,13 +35,84 @@ type genericRun struct {
 }
 
 func (g *GenericExecutor) Run(ctx context.Context, info ScenarioInfo) error {
+	g.mu.Lock()
+	if g.state == nil {
+		g.state = &ExecutorState{}
+	}
+	if g.state.StartedAt.IsZero() {
+		g.state.StartedAt = time.Now()
+	}
+	g.mu.Unlock()
+
 	r, err := g.newRun(info)
 	if err != nil {
 		return err
 	}
 	return r.Run(ctx)
 }
 
+func (g *GenericExecutor) RecordCompletion() {
+	g.mu.Lock()
+	defer g.mu.Unlock()
+
+	if g.state != nil {
+		g.state.CompletedIterations += 1
+		g.state.LastCompletedAt = time.Now()
+	}
+}
+
+func (g *GenericExecutor) RecordError(err error) {
+	g.mu.Lock()
+	defer g.mu.Unlock()
+
+	if g.state != nil && err != nil {
+		g.state.IterationErrors = append(g.state.IterationErrors, err.Error())
+	}
+}
+
+func (g *GenericExecutor) VerifyRun(ctx context.Context, info ScenarioInfo) []error {
+	g.mu.Lock()
+	state := g.state
+	checker := g.WorkflowCompletionChecker
+	g.mu.Unlock()
+
+	if checker == nil {
+		return nil
+	}
+	if err := checker.Verify(ctx, info, state); err != nil {
+		return []error{err}
+	}
+	return nil
+}
+
+// GetState returns a copy of the current state
+func (g *GenericExecutor) GetState() ExecutorState {
+	g.mu.Lock()
+	defer g.mu.Unlock()
+
+	if g.state == nil {
+		return ExecutorState{}
+	}
+	return *g.state
+}
+
+func (g *GenericExecutor) Snapshot() any {
+	return g.GetState()
+}
+
+func (g *GenericExecutor) LoadState(loader func(any) error) error {
+	var state ExecutorState
+	if err := loader(&state); err != nil {
+		return err
+	}
+
+	g.mu.Lock()
+	g.state = &state
+	g.mu.Unlock()
+
+	return nil
+}
+
 func (g *GenericExecutor) newRun(info ScenarioInfo) (*genericRun, error) {
 	info.Configuration.ApplyDefaults()
 	if err := info.Configuration.Validate(); err != nil {
@@ -83,7 +165,12 @@ func (g *genericRun) Run(ctx context.Context) error {
 		case err := <-doneCh:
 			currentlyRunning--
 			if err != nil {
-				runErr = err
+				if g.config.ContinueOnError {
+					g.logger.Warnf("Iteration failed but continuing due to --continue-on-error: %v", err)
+					g.executor.RecordError(err)
+				} else {
+					runErr = err
+				}
 			}
 		case <-contextToWaitOn.Done():
 		}
@@ -125,6 +212,7 @@ func (g *genericRun) Run(ctx context.Context) error {
 		run := g.info.NewRun(i + 1)
 		go func() {
 			var err error
+			var shouldRecordCompletion bool
 			iterStart := time.Now()
 
 			defer func() {
@@ -133,8 +221,11 @@ func (g *genericRun) Run(ctx context.Context) error {
 				select {
 				case <-ctx.Done():
 				case doneCh <- err:
-					if err == nil && g.config.OnCompletion != nil {
-						g.config.OnCompletion(ctx, run)
+					if err == nil && shouldRecordCompletion {
+						g.executor.RecordCompletion()
+						if g.config.OnCompletion != nil {
+							g.config.OnCompletion(ctx, run)
+						}
 					}
 				}
 			}()
@@ -145,7 +236,22 @@ func (g *genericRun) Run(ctx context.Context) error {
 				if err != nil && g.config.HandleExecuteError != nil {
 					err = g.config.HandleExecuteError(ctx, run, err)
 				}
+
+				// Check if workflow was already started
+				if err != nil {
+					var alreadyStartedErr *serviceerror.WorkflowExecutionAlreadyStarted
+					if errors.As(err, &alreadyStartedErr) {
+						if g.config.IgnoreAlreadyStarted {
+							g.logger.Debugf("Workflow already started, skipping iteration %v", run.Iteration)
+							err = nil
+							shouldRecordCompletion = false
+							break
+						}
+					}
+				}
+
 				if err == nil {
+					shouldRecordCompletion = true
 					break
 				}
 
 
@@ -258,3 +258,34 @@ func TestExecutorRetriesLimit(t *testing.T) {
 		require.Equal(t, []int{1, 1, 1, 1, 1}, totalTracker.seen, "expected 5 attempts")
 	})
 }
+
+func TestExecutorContinuesOnError(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		tracker := newIterationTracker()
+		executor := &GenericExecutor{
+			Execute: func(ctx context.Context, run *Run) error {
+				tracker.track(run.Iteration)
+				if run.Iteration == 2 || run.Iteration == 4 {
+					return errors.New("deliberate failure")
+				}
+				return nil
+			},
+		}
+
+		err := execute(executor,
+			RunConfiguration{
+				Iterations:      5,
+				ContinueOnError: true,
+			},
+		)
+
+		require.NoError(t, err, "executor should complete when ContinueOnError is true")
+		tracker.assertSeen(t, 5)
+
+		state := executor.GetState()
+		require.Equal(t, 3, state.CompletedIterations)
+		require.Len(t, state.IterationErrors, 2)
+		require.Contains(t, state.IterationErrors[0], "deliberate failure")
+		require.Contains(t, state.IterationErrors[1], "deliberate failure")
+	})
+}
@@ -8,6 +8,8 @@ import (
 )
 
 type KitchenSinkExecutor struct {
+	GenericExecutor
+
 	TestInput *kitchensink.TestInput
 
 	// Called once on start
@@ -18,29 +20,28 @@ type KitchenSinkExecutor struct {
 	UpdateWorkflowOptions func(context.Context, *Run, *KitchenSinkWorkflowOptions) error
 }
 
-func (k KitchenSinkExecutor) Run(ctx context.Context, info ScenarioInfo) error {
+func (k *KitchenSinkExecutor) Run(ctx context.Context, info ScenarioInfo) error {
 	if k.PrepareTestInput != nil {
 		if err := k.PrepareTestInput(ctx, info, k.TestInput); err != nil {
 			return err
 		}
 	}
-	// Create generic executor and run it
-	ge := &GenericExecutor{
-		Execute: func(ctx context.Context, run *Run) error {
-			options := run.DefaultKitchenSinkWorkflowOptions()
-			testInputClone, ok := proto.Clone(k.TestInput).(*kitchensink.TestInput)
-			if !ok {
-				panic("failed to clone test input")
-			}
-			options.Params = testInputClone
-			if k.UpdateWorkflowOptions != nil {
-				err := k.UpdateWorkflowOptions(ctx, run, &options)
-				if err != nil {
-					return err
-				}
+
+	k.GenericExecutor.Execute = func(ctx context.Context, run *Run) error {
+		options := run.DefaultKitchenSinkWorkflowOptions()
+		testInputClone, ok := proto.Clone(k.TestInput).(*kitchensink.TestInput)
+		if !ok {
+			panic("failed to clone test input")
+		}
+		options.Params = testInputClone
+		if k.UpdateWorkflowOptions != nil {
+			err := k.UpdateWorkflowOptions(ctx, run, &options)
+			if err != nil {
+				return err
 			}
-			return run.ExecuteKitchenSinkWorkflow(ctx, &options)
-		},
+		}
+		return run.ExecuteKitchenSinkWorkflow(ctx, &options)
 	}
-	return ge.Run(ctx, info)
+
+	return k.GenericExecutor.Run(ctx, info)
 }
@@ -30,6 +30,17 @@ type Executor interface {
 	Run(context.Context, ScenarioInfo) error
 }
 
+type ExecutorState struct {
+	// StartedAt is the timestamp when the executor run started.
+	StartedAt time.Time `json:"startedAt"`
+	// CompletedIterations tracks the number of successfully completed iterations.
+	CompletedIterations int `json:"completedIterations"`
+	// LastCompletedAt is the timestamp of the last completed workflow.
+	LastCompletedAt time.Time `json:"lastCompletedAt"`
+	// IterationErrors tracks errors encountered during iterations (for debugging/resumption)
+	IterationErrors []string `json:"iterationErrors,omitempty"`
+}
+
 // Optional interface that can be implemented by an [Executor] to allow it to be resumable.
 type Resumable interface {
 	// LoadState loads a snapshot into the executor's internal state.
@@ -53,6 +64,12 @@ type Configurable interface {
 	Configure(ScenarioInfo) error
 }
 
+// Verifyable is an optional interface that executors can implement to perform verifications after Run() completes.
+type Verifyable interface {
+	// VerifyRun performs post-execution verifications and returns a list of errors.
+	VerifyRun(context.Context, ScenarioInfo) []error
+}
+
 // ExecutorFunc is an [Executor] implementation for a function
 type ExecutorFunc func(context.Context, ScenarioInfo) error
 
@@ -204,6 +221,9 @@ type RunConfiguration struct {
 	// IgnoreAlreadyStarted, if set, will not error when a workflow with the same ID already exists.
 	// Default is false.
 	IgnoreAlreadyStarted bool
+	// ContinueOnError, if set, will continue running iterations even after an iteration fails
+	// (after all retries are exhausted). Default is false.
+	ContinueOnError bool
 	// OnCompletion, if set, is invoked after each successful iteration completes.
 	OnCompletion func(context.Context, *Run)
 	// HandleExecuteError, if set, is called when Execute returns an error, allowing transformation of errors.