temporalio
diff --git a/‎.github/workflows/docker-images.yml‎
Lines changed: 9 additions & 1 deletion b/‎.github/workflows/docker-images.yml‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎cmd/cli/run_scenario.go‎
Lines changed: 44 additions & 8 deletions b/‎cmd/cli/run_scenario.go‎
Lines changed: 44 additions & 8 deletions
diff --git a/‎loadgen/generic_executor.go‎
Lines changed: 139 additions & 3 deletions b/‎loadgen/generic_executor.go‎
Lines changed: 139 additions & 3 deletions
diff --git a/‎loadgen/generic_executor_test.go‎
Lines changed: 32 additions & 0 deletions b/‎loadgen/generic_executor_test.go‎
Lines changed: 32 additions & 0 deletions
@@ -78,11 +78,19 @@ jobs:
           username: ${{ secrets.DOCKER_USERNAME }}
           password: ${{ secrets.DOCKER_PAT }}
 
+      - name: Extract branch name
+        id: extract_branch
+        run: |
+          BRANCH_NAME="${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}"
+          SANITIZED_BRANCH=$(echo "$BRANCH_NAME" | sed 's/\//-/g' | sed 's/[^a-zA-Z0-9._-]/-/g')
+          echo "branch_name=$SANITIZED_BRANCH" >> $GITHUB_OUTPUT
+
       - name: Build and push to Docker Hub
         env:
           LANG: ${{ inputs.lang }}
           SDK_VERSION: ${{ inputs.sdk-version || 'checked-out-sdk/' }}
-          IMAGE_TAG_ARGS: ${{ inputs.sdk-repo-ref && format('--image-tag {0}-{1}', inputs.lang, inputs.docker-tag-ext) || ''}}
+          BRANCH_TAG_COMPONENT: ${{ inputs.lang && format('{0}-{1}', inputs.lang, steps.extract_branch.outputs.branch_name) || format('cli-{0}', steps.extract_branch.outputs.branch_name) }}
+          IMAGE_TAG_ARGS: ${{ inputs.sdk-repo-ref && format('--image-tag {0}-{1} --image-tag {2}', inputs.lang, inputs.docker-tag-ext, inputs.lang && format('{0}-{1}', inputs.lang, steps.extract_branch.outputs.branch_name) || format('cli-{0}', steps.extract_branch.outputs.branch_name)) || format('--image-tag {0}', inputs.lang && format('{0}-{1}', inputs.lang, steps.extract_branch.outputs.branch_name) || format('cli-{0}', steps.extract_branch.outputs.branch_name)) }}
           TAG_LATEST_ARGS: ${{ inputs.as-latest && '--tag-as-latest' || ''}}
           LANG_ARGS: ${{ inputs.lang && format('--language {0}', inputs.lang) || '' }}
           VERSION_ARGS: ${{ inputs.sdk-version && format('--version {0}', inputs.sdk-version) || '' }}
 
@@ -2,6 +2,9 @@ package cli
 
 import (
 	"context"
+	"crypto/rand"
+	"encoding/hex"
+	"errors"
 	"fmt"
 	"os"
 	"strings"
@@ -56,7 +59,7 @@ type scenarioRunConfig struct {
 	scenarioOptions               []string
 	timeout                       time.Duration
 	doNotRegisterSearchAttributes bool
-	ignoreAlreadyStarted          bool
+	continueOnError               bool
 }
 
 func (r *scenarioRunner) addCLIFlags(fs *pflag.FlagSet) {
@@ -82,8 +85,9 @@ func (r *scenarioRunConfig) addCLIFlags(fs *pflag.FlagSet) {
 	fs.BoolVar(&r.doNotRegisterSearchAttributes, "do-not-register-search-attributes", false,
 		"Do not register the default search attributes used by scenarios. "+
 			"If the search attributes are not registed by the scenario they must be registered through some other method")
-	fs.BoolVar(&r.ignoreAlreadyStarted, "ignore-already-started", false,
-		"Ignore if a workflow with the same ID already exists. A Scenario may choose to override this behavior.")
+	fs.BoolVar(&r.continueOnError, "continue-on-error", false,
+		"Continue running even when any iterations fail after all retries are exhausted. "+
+			"In case of any errors, Omes will exit nonzero and log the errors.")
 }
 
 func (r *scenarioRunner) preRun() {
@@ -145,9 +149,16 @@ func (r *scenarioRunner) run(ctx context.Context) error {
 		return fmt.Errorf("failed to get root directory: %w", err)
 	}
 
+	// Generate a random execution ID to ensure no two executions with the same RunID collide
+	executionID, err := generateExecutionID()
+	if err != nil {
+		return fmt.Errorf("failed to generate execution ID: %w", err)
+	}
+
 	scenarioInfo := loadgen.ScenarioInfo{
 		ScenarioName:   r.scenario.Scenario,
 		RunID:          r.scenario.RunID,
+		ExecutionID:    executionID,
 		Logger:         r.logger,
 		MetricsHandler: metrics.NewHandler(),
 		Client:         client,
@@ -159,16 +170,41 @@ func (r *scenarioRunner) run(ctx context.Context) error {
 			MaxIterationAttempts:          r.maxIterationAttempts,
 			Timeout:                       r.timeout,
 			DoNotRegisterSearchAttributes: r.doNotRegisterSearchAttributes,
-			IgnoreAlreadyStarted:          r.ignoreAlreadyStarted,
+			ContinueOnError:               r.continueOnError,
 		},
 		ScenarioOptions: scenarioOptions,
 		Namespace:       r.clientOptions.Namespace,
 		RootPath:        repoDir,
 	}
 	executor := scenario.ExecutorFn()
-	err = executor.Run(ctx, scenarioInfo)
-	if err != nil {
-		return fmt.Errorf("failed scenario: %w", err)
+
+	// 1. Run the scenario
+	scenarioErr := executor.Run(ctx, scenarioInfo)
+
+	// Collect all errors
+	var allErrors []error
+	if scenarioErr != nil {
+		allErrors = append(allErrors, fmt.Errorf("scenario execution: %w", scenarioErr))
+	}
+
+	// 2. Run verifications
+	if verifiable, ok := executor.(loadgen.Verifyable); ok {
+		verifyErrs := verifiable.VerifyRun(ctx, scenarioInfo)
+		for _, err := range verifyErrs {
+			allErrors = append(allErrors, fmt.Errorf("post-scenario verification: %w", err))
+		}
+	}
+
+	// Aggregate all errors
+	return errors.Join(allErrors...)
+}
+
+// generateExecutionID generates a random execution ID to uniquely identify this particular
+// execution of a scenario. This ensures no two executions with the same RunID collide.
+func generateExecutionID() (string, error) {
+	bytes := make([]byte, 8) // 8 bytes = 16 hex characters
+	if _, err := rand.Read(bytes); err != nil {
+		return "", err
 	}
-	return nil
+	return hex.EncodeToString(bytes), nil
 }
@@ -2,16 +2,28 @@ package loadgen
 
 import (
 	"context"
+	"errors"
 	"fmt"
+	"sync"
 	"time"
 
+	"go.temporal.io/api/serviceerror"
 	"go.temporal.io/sdk/client"
 	"go.uber.org/zap"
 )
 
+// skipIterationErr is a sentinel error indicating that the iteration
+// should be skipped and not recorded as a completion or failure.
+var skipIterationErr = errors.New("skip iteration")
+
 type GenericExecutor struct {
 	// Function to execute a single iteration of this scenario
 	Execute func(context.Context, *Run) error
+
+	// State management
+	mu                        sync.Mutex
+	state                     *ExecutorState
+	workflowCompletionChecker *WorkflowCompletionChecker
 }
 
 type genericRun struct {
@@ -24,13 +36,109 @@ type genericRun struct {
 }
 
 func (g *GenericExecutor) Run(ctx context.Context, info ScenarioInfo) error {
+	g.mu.Lock()
+	if g.state == nil {
+		g.state = &ExecutorState{
+			ExecutionID: info.ExecutionID,
+		}
+	}
+	if g.state.StartedAt.IsZero() {
+		g.state.StartedAt = time.Now()
+	}
+	g.mu.Unlock()
+
 	r, err := g.newRun(info)
 	if err != nil {
 		return err
 	}
 	return r.Run(ctx)
 }
 
+func (g *GenericExecutor) RecordCompletion() {
+	g.mu.Lock()
+	defer g.mu.Unlock()
+
+	if g.state != nil {
+		g.state.CompletedIterations += 1
+		g.state.LastCompletedAt = time.Now()
+	}
+}
+
+func (g *GenericExecutor) RecordError(err error) {
+	g.mu.Lock()
+	defer g.mu.Unlock()
+
+	if g.state != nil && err != nil {
+		g.state.IterationErrors = append(g.state.IterationErrors, err.Error())
+	}
+}
+
+func (g *GenericExecutor) VerifyRun(ctx context.Context, info ScenarioInfo) []error {
+	g.mu.Lock()
+	state := *g.state
+	checker := g.workflowCompletionChecker
+	g.mu.Unlock()
+
+	if checker == nil {
+		return nil
+	}
+	if err := checker.Verify(ctx, state); err != nil {
+		return []error{err}
+	}
+	return nil
+}
+
+// EnableWorkflowCompletionCheck enables workflow completion verification for this executor.
+// It initializes a checker with the given timeout and registers the required search attributes.
+// The timeout specifies how long to wait for workflow completion verification (defaults to 30 seconds if zero).
+// The expectedWorkflowCount function, if provided, calculates the expected number of workflows from the ExecutorState.
+// If nil, defaults to using state.CompletedIterations.
+// Returns an error if search attribute registration fails.
+func (g *GenericExecutor) EnableWorkflowCompletionCheck(ctx context.Context, info ScenarioInfo, timeout time.Duration, expectedWorkflowCount func(ExecutorState) int) error {
+	checker, err := NewWorkflowCompletionChecker(ctx, info, timeout)
+	if err != nil {
+		return err
+	}
+
+	if expectedWorkflowCount != nil {
+		checker.SetExpectedWorkflowCount(expectedWorkflowCount)
+	}
+
+	g.mu.Lock()
+	g.workflowCompletionChecker = checker
+	g.mu.Unlock()
+
+	return nil
+}
+
+// GetState returns a copy of the current state
+func (g *GenericExecutor) GetState() ExecutorState {
+	g.mu.Lock()
+	defer g.mu.Unlock()
+
+	if g.state == nil {
+		return ExecutorState{}
+	}
+	return *g.state
+}
+
+func (g *GenericExecutor) Snapshot() any {
+	return g.GetState()
+}
+
+func (g *GenericExecutor) LoadState(loader func(any) error) error {
+	var state ExecutorState
+	if err := loader(&state); err != nil {
+		return err
+	}
+
+	g.mu.Lock()
+	g.state = &state
+	g.mu.Unlock()
+
+	return nil
+}
+
 func (g *GenericExecutor) newRun(info ScenarioInfo) (*genericRun, error) {
 	info.Configuration.ApplyDefaults()
 	if err := info.Configuration.Validate(); err != nil {
@@ -83,7 +191,12 @@ func (g *genericRun) Run(ctx context.Context) error {
 		case err := <-doneCh:
 			currentlyRunning--
 			if err != nil {
-				runErr = err
+				if g.config.ContinueOnError {
+					g.logger.Warnf("Iteration failed but continuing due to --continue-on-error: %v", err)
+					g.executor.RecordError(err)
+				} else {
+					runErr = err
+				}
 			}
 		case <-contextToWaitOn.Done():
 		}
@@ -130,25 +243,48 @@ func (g *genericRun) Run(ctx context.Context) error {
 			defer func() {
 				g.executeTimer.Record(time.Since(iterStart))
 
+				// Check if this is the special "skip iteration" error
+				isSkipIteration := errors.Is(err, skipIterationErr)
+				if isSkipIteration {
+					err = nil // Don't propagate this as an actual error
+				}
+
 				select {
 				case <-ctx.Done():
 				case doneCh <- err:
-					if err == nil && g.config.OnCompletion != nil {
-						g.config.OnCompletion(ctx, run)
+					if err == nil && !isSkipIteration {
+						g.executor.RecordCompletion()
+						if g.config.OnCompletion != nil {
+							g.config.OnCompletion(ctx, run)
+						}
 					}
 				}
 			}()
 
 		retryLoop:
 			for {
 				err = g.executor.Execute(ctx, run)
+
+				// Skip if workflow was already started.
+				if err != nil {
+					var alreadyStartedErr *serviceerror.WorkflowExecutionAlreadyStarted
+					if errors.As(err, &alreadyStartedErr) {
+						g.logger.Debugf("Workflow already started, skipping iteration %v", run.Iteration)
+						err = skipIterationErr
+						break
+					}
+				}
+
+				// If defined, invoke user-defined error handler.
 				if err != nil && g.config.HandleExecuteError != nil {
 					err = g.config.HandleExecuteError(ctx, run, err)
 				}
+
 				if err == nil {
 					break
 				}
 
+				// Attempt to retry.
 				backoff, retry := run.ShouldRetry(err)
 				if retry {
 					err = fmt.Errorf("iteration %v encountered error: %w", run.Iteration, err)
 
@@ -42,6 +42,7 @@ func execute(executor *GenericExecutor, runConfig RunConfiguration) error {
 	info := ScenarioInfo{
 		MetricsHandler: client.MetricsNopHandler,
 		Logger:         logger.Sugar(),
+		ExecutionID:    "test-exec-id",
 		Configuration:  runConfig,
 	}
 	return executor.Run(context.Background(), info)
@@ -258,3 +259,34 @@ func TestExecutorRetriesLimit(t *testing.T) {
 		require.Equal(t, []int{1, 1, 1, 1, 1}, totalTracker.seen, "expected 5 attempts")
 	})
 }
+
+func TestExecutorContinuesOnError(t *testing.T) {
+	synctest.Test(t, func(t *testing.T) {
+		tracker := newIterationTracker()
+		executor := &GenericExecutor{
+			Execute: func(ctx context.Context, run *Run) error {
+				tracker.track(run.Iteration)
+				if run.Iteration == 2 || run.Iteration == 4 {
+					return errors.New("deliberate failure")
+				}
+				return nil
+			},
+		}
+
+		err := execute(executor,
+			RunConfiguration{
+				Iterations:      5,
+				ContinueOnError: true,
+			},
+		)
+
+		require.NoError(t, err, "executor should complete when ContinueOnError is true")
+		tracker.assertSeen(t, 5)
+
+		state := executor.GetState()
+		require.Equal(t, 3, state.CompletedIterations)
+		require.Len(t, state.IterationErrors, 2)
+		require.Contains(t, state.IterationErrors[0], "deliberate failure")
+		require.Contains(t, state.IterationErrors[1], "deliberate failure")
+	})
+}