diff --git a/.github/workflows/docker-images.yml b/.github/workflows/docker-images.yml index 51366e4a..4f1d1798 100644 --- a/.github/workflows/docker-images.yml +++ b/.github/workflows/docker-images.yml @@ -82,11 +82,19 @@ jobs: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_PAT }} + - name: Extract branch name + id: extract_branch + run: | + BRANCH_NAME="${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" + SANITIZED_BRANCH=$(echo "$BRANCH_NAME" | sed 's/\//-/g' | sed 's/[^a-zA-Z0-9._-]/-/g') + echo "branch_name=$SANITIZED_BRANCH" >> $GITHUB_OUTPUT + - name: Build and push to Docker Hub env: LANG: ${{ inputs.lang }} SDK_VERSION: ${{ inputs.sdk-version || 'checked-out-sdk/' }} - IMAGE_TAG_ARGS: ${{ inputs.sdk-repo-ref && format('--image-tag {0}-{1}', inputs.lang, inputs.docker-tag-ext) || ''}} + BRANCH_TAG_COMPONENT: ${{ inputs.lang && format('{0}-{1}', inputs.lang, steps.extract_branch.outputs.branch_name) || format('cli-{0}', steps.extract_branch.outputs.branch_name) }} + IMAGE_TAG_ARGS: ${{ inputs.sdk-repo-ref && format('--image-tag {0}-{1} --image-tag {2}', inputs.lang, inputs.docker-tag-ext, inputs.lang && format('{0}-{1}', inputs.lang, steps.extract_branch.outputs.branch_name) || format('cli-{0}', steps.extract_branch.outputs.branch_name)) || format('--image-tag {0}', inputs.lang && format('{0}-{1}', inputs.lang, steps.extract_branch.outputs.branch_name) || format('cli-{0}', steps.extract_branch.outputs.branch_name)) }} TAG_LATEST_ARGS: ${{ inputs.as-latest && '--tag-as-latest' || ''}} LANG_ARGS: ${{ inputs.lang && format('--language {0}', inputs.lang) || '' }} VERSION_ARGS: ${{ inputs.sdk-version && format('--version {0}', inputs.sdk-version) || '' }} diff --git a/cmd/cli/run_scenario.go b/cmd/cli/run_scenario.go index eee8d1b0..66e8b7e2 100644 --- a/cmd/cli/run_scenario.go +++ b/cmd/cli/run_scenario.go @@ -2,11 +2,15 @@ package cli import ( "context" + "crypto/rand" + "encoding/hex" + "errors" "fmt" "os" "strings" "time" + "github.com/antithesishq/antithesis-sdk-go/assert" "github.com/spf13/cobra" "github.com/spf13/pflag" "github.com/temporalio/omes/cmd/clioptions" @@ -56,7 +60,7 @@ type scenarioRunConfig struct { scenarioOptions []string timeout time.Duration doNotRegisterSearchAttributes bool - ignoreAlreadyStarted bool + verificationTimeout time.Duration } func (r *scenarioRunner) addCLIFlags(fs *pflag.FlagSet) { @@ -82,8 +86,8 @@ func (r *scenarioRunConfig) addCLIFlags(fs *pflag.FlagSet) { fs.BoolVar(&r.doNotRegisterSearchAttributes, "do-not-register-search-attributes", false, "Do not register the default search attributes used by scenarios. "+ "If the search attributes are not registed by the scenario they must be registered through some other method") - fs.BoolVar(&r.ignoreAlreadyStarted, "ignore-already-started", false, - "Ignore if a workflow with the same ID already exists. A Scenario may choose to override this behavior.") + fs.DurationVar(&r.verificationTimeout, "verification-timeout", 2*time.Minute, + "Maximum duration to wait for post-scenario verification (default 2m).") } func (r *scenarioRunner) preRun() { @@ -98,6 +102,8 @@ func (r *scenarioRunner) run(ctx context.Context) error { return fmt.Errorf("run ID not found") } else if r.iterations > 0 && r.duration > 0 { return fmt.Errorf("cannot provide both iterations and duration") + } else if r.verificationTimeout <= 0 { + return fmt.Errorf("verification-timeout must be greater than 0") } // Parse options @@ -137,6 +143,8 @@ func (r *scenarioRunner) run(ctx context.Context) error { } // Wait 300ms and try again time.Sleep(300 * time.Millisecond) + + r.logger.Error("Failed to dial, retrying ...", zap.Error(err)) } defer client.Close() @@ -145,9 +153,16 @@ func (r *scenarioRunner) run(ctx context.Context) error { return fmt.Errorf("failed to get root directory: %w", err) } + // Generate a random execution ID to ensure no two executions with the same RunID collide + executionID, err := generateExecutionID() + if err != nil { + return fmt.Errorf("failed to generate execution ID: %w", err) + } + scenarioInfo := loadgen.ScenarioInfo{ ScenarioName: r.scenario.Scenario, RunID: r.scenario.RunID, + ExecutionID: executionID, Logger: r.logger, MetricsHandler: metrics.NewHandler(), Client: client, @@ -159,16 +174,45 @@ func (r *scenarioRunner) run(ctx context.Context) error { MaxIterationAttempts: r.maxIterationAttempts, Timeout: r.timeout, DoNotRegisterSearchAttributes: r.doNotRegisterSearchAttributes, - IgnoreAlreadyStarted: r.ignoreAlreadyStarted, }, ScenarioOptions: scenarioOptions, Namespace: r.clientOptions.Namespace, RootPath: repoDir, } executor := scenario.ExecutorFn() - err = executor.Run(ctx, scenarioInfo) - if err != nil { - return fmt.Errorf("failed scenario: %w", err) + + // 1. Run the scenario + scenarioErr := executor.Run(ctx, scenarioInfo) + + // Collect all errors + var allErrors []error + if scenarioErr != nil { + allErrors = append(allErrors, fmt.Errorf("scenario execution failed: %w", scenarioErr)) + assert.Unreachable("scenario execution failed", map[string]any{"error": scenarioErr}) + } + + verifyCtx, verifyCancel := context.WithTimeout(ctx, r.verificationTimeout) + defer verifyCancel() + + // 2. Run verifications + if scenario.VerifyFn != nil { + verifyErrs := scenario.VerifyFn(verifyCtx, scenarioInfo, executor) + for _, err := range verifyErrs { + allErrors = append(allErrors, fmt.Errorf("post-scenario verification failed: %w", err)) + assert.Unreachable("post-scenario verification failed", map[string]any{"error": err}) + } + } + + // Aggregate all errors + return errors.Join(allErrors...) +} + +// generateExecutionID generates a random execution ID to uniquely identify this particular +// execution of a scenario. This ensures no two executions with the same RunID collide. +func generateExecutionID() (string, error) { + bytes := make([]byte, 8) // 8 bytes = 16 hex characters + if _, err := rand.Read(bytes); err != nil { + return "", err } - return nil + return hex.EncodeToString(bytes), nil } diff --git a/cmd/cli/run_scenario_with_worker.go b/cmd/cli/run_scenario_with_worker.go index 327e6485..9fd2f5bb 100644 --- a/cmd/cli/run_scenario_with_worker.go +++ b/cmd/cli/run_scenario_with_worker.go @@ -80,6 +80,7 @@ func (r *workerWithScenarioRunner) run(ctx context.Context) error { maxIterationsPerSecond: r.maxIterationsPerSecond, scenarioOptions: r.scenarioOptions, timeout: r.timeout, + verificationTimeout: r.verificationTimeout, doNotRegisterSearchAttributes: r.doNotRegisterSearchAttributes, }, clientOptions: r.ClientOptions, diff --git a/dockerfiles/cli.Dockerfile b/dockerfiles/cli.Dockerfile index f29bf435..0c825f26 100644 --- a/dockerfiles/cli.Dockerfile +++ b/dockerfiles/cli.Dockerfile @@ -1,61 +1,101 @@ # Build in a full featured container ARG TARGETARCH -FROM --platform=linux/$TARGETARCH golang:1.25 AS build +# Source stage: prepare source code and install Antithesis SDK +FROM --platform=linux/$TARGETARCH golang:1.25 AS source WORKDIR /app # Install protobuf compiler and git RUN apt-get update \ - && DEBIAN_FRONTEND=noninteractive \ + && DEBIAN_FRONTEND=noninteractive \ apt-get install --no-install-recommends --assume-yes \ - protobuf-compiler=3.21.12-11 libprotoc-dev=3.21.12-11 \ - && rm -rf /var/lib/apt/lists/* + protobuf-compiler=3.21.12-11 libprotoc-dev=3.21.12-11 \ + && rm -rf /var/lib/apt/lists/* -# Install Rust for kitchen-sink-gen -RUN wget -q -O - https://sh.rustup.rs | sh -s -- -y \ - && . $HOME/.cargo/env \ - && echo "TARGETARCH: $TARGETARCH" \ - && ARCH=$(uname -m) \ - && echo "uname -m: $ARCH" \ - && if [ "$TARGETARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then \ - rustup target add aarch64-unknown-linux-musl; \ - else \ - rustup target add x86_64-unknown-linux-musl; \ - fi -ENV PATH="$PATH:/root/.cargo/bin" - -# Copy CLI build dependencies +# Copy all source code COPY cmd ./cmd COPY loadgen ./loadgen COPY scenarios ./scenarios COPY workers ./workers/ COPY go.mod go.sum ./ +# Install Antithesis SDK and instrumentor +RUN go get github.com/antithesishq/antithesis-sdk-go@feature-assertion-wrappers && \ + go install github.com/antithesishq/antithesis-sdk-go/tools/antithesis-go-instrumentor@feature-assertion-wrappers + +# Instrumented stage: instrument the code with Antithesis +FROM --platform=linux/$TARGETARCH golang:1.25 AS instrumented + +# Copy source and instrumentor +COPY --from=source /app /app +COPY --from=source /go/bin/antithesis-go-instrumentor /go/bin/antithesis-go-instrumentor +COPY --from=source /go/pkg/mod /go/pkg/mod + +WORKDIR /app + +RUN mkdir /app_transformed && \ + antithesis-go-instrumentor /app /app_transformed + +# Build stage: compile the instrumented code +FROM --platform=linux/$TARGETARCH golang:1.25 AS build + +ARG TARGETARCH + +# Install protobuf compiler and git +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive \ + apt-get install --no-install-recommends --assume-yes \ + protobuf-compiler=3.21.12-11 libprotoc-dev=3.21.12-11 \ + && rm -rf /var/lib/apt/lists/* + +# Install Rust for kitchen-sink-gen +RUN wget -q -O - https://sh.rustup.rs | sh -s -- -y \ + && . $HOME/.cargo/env \ + && echo "TARGETARCH: $TARGETARCH" \ + && ARCH=$(uname -m) \ + && echo "uname -m: $ARCH" \ + && if [ "$TARGETARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then \ + rustup target add aarch64-unknown-linux-musl; \ + else \ + rustup target add x86_64-unknown-linux-musl; \ + fi +ENV PATH="$PATH:/root/.cargo/bin" + +# Copy entire instrumented structure +COPY --from=instrumented /app_transformed /app_transformed + +# Set working directory to the customer code +WORKDIR /app_transformed/customer + # Build the CLI -RUN CGO_ENABLED=0 go build -o temporal-omes ./cmd +RUN CGO_ENABLED=0 go build -o temporal-omes -tags with_antithesis_sdk ./cmd # Install protoc-gen-go for kitchen-sink-gen build RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.31.0 # Build kitchen-sink-gen (statically linked) RUN cd loadgen/kitchen-sink-gen && \ - echo "TARGETARCH: $TARGETARCH" && \ - ARCH=$(uname -m) && \ - echo "uname -m: $ARCH" && \ - if [ "$TARGETARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then \ + echo "TARGETARCH: $TARGETARCH" && \ + ARCH=$(uname -m) && \ + echo "uname -m: $ARCH" && \ + if [ "$TARGETARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then \ RUST_TARGET=aarch64-unknown-linux-musl; \ - else \ + else \ RUST_TARGET=x86_64-unknown-linux-musl; \ - fi && \ - echo "Building for rust target: $RUST_TARGET" && \ - RUSTFLAGS='-C target-feature=+crt-static' cargo build --release --target $RUST_TARGET + fi && \ + echo "Building for rust target: $RUST_TARGET" && \ + RUSTFLAGS='-C target-feature=+crt-static' cargo build --release --target $RUST_TARGET # Copy the CLI to a distroless "run" container FROM --platform=linux/$TARGETARCH gcr.io/distroless/static-debian11:nonroot -COPY --from=build /app/temporal-omes /app/temporal-omes -COPY --from=build /app/loadgen/kitchen-sink-gen/target/*/release/kitchen-sink-gen /app/kitchen-sink-gen +COPY --from=build /app_transformed/customer/temporal-omes /app/temporal-omes +COPY --from=build /app_transformed/customer/loadgen/kitchen-sink-gen/target/*/release/kitchen-sink-gen /app/kitchen-sink-gen + +# Copy instrumentation metadata +COPY --from=instrumented /app_transformed/notifier /notifier +COPY --from=instrumented /app_transformed/symbols /symbols # Default entrypoint for CLI usage -ENTRYPOINT ["/app/temporal-omes"] \ No newline at end of file +ENTRYPOINT ["/app/temporal-omes"] diff --git a/go.mod b/go.mod index 7ab7d254..5f842e26 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/temporalio/omes go 1.25.0 require ( + github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6 github.com/gogo/protobuf v1.3.2 github.com/golang/protobuf v1.5.4 github.com/google/uuid v1.6.0 diff --git a/go.sum b/go.sum index 7fbdd22a..becb2674 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6 h1:qSD74Vz3scN2SrfML8dy2Whcv0C3pNkfqYZXeL4SIq0= +github.com/antithesishq/antithesis-sdk-go v0.5.1-0.20250924165633-f60b0222f1b6/go.mod h1:IUpT2DPAKh6i/YhSbt6Gl3v2yvUZjmKncl7U91fup7E= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= diff --git a/loadgen/ebbandflow/ebb_and_flow.go b/loadgen/ebbandflow/ebb_and_flow.go deleted file mode 100644 index 8122a41f..00000000 --- a/loadgen/ebbandflow/ebb_and_flow.go +++ /dev/null @@ -1,19 +0,0 @@ -package ebbandflow - -import ( - "time" - - "github.com/temporalio/omes/loadgen" -) - -type WorkflowParams struct { - SleepActivities *loadgen.SleepActivityConfig `json:"sleepActivities"` -} - -type WorkflowOutput struct { - Timings []ActivityTiming `json:"timings"` -} - -type ActivityTiming struct { - ScheduleToStart time.Duration `json:"d"` -} diff --git a/loadgen/generic_executor.go b/loadgen/generic_executor.go index df28e3c8..54f72796 100644 --- a/loadgen/generic_executor.go +++ b/loadgen/generic_executor.go @@ -2,16 +2,28 @@ package loadgen import ( "context" + "errors" "fmt" + "sync" "time" + "github.com/antithesishq/antithesis-sdk-go/assert" + "go.temporal.io/api/serviceerror" "go.temporal.io/sdk/client" "go.uber.org/zap" ) +// skipIterationErr is a sentinel error indicating that the iteration +// should be skipped and not recorded as a completion or failure. +var skipIterationErr = errors.New("skip iteration") + type GenericExecutor struct { // Function to execute a single iteration of this scenario Execute func(context.Context, *Run) error + + // State management + mu sync.Mutex + state *ExecutorState } type genericRun struct { @@ -24,6 +36,17 @@ type genericRun struct { } func (g *GenericExecutor) Run(ctx context.Context, info ScenarioInfo) error { + g.mu.Lock() + if g.state == nil { + g.state = &ExecutorState{ + ExecutionID: info.ExecutionID, + } + } + if g.state.StartedAt.IsZero() { + g.state.StartedAt = time.Now() + } + g.mu.Unlock() + r, err := g.newRun(info) if err != nil { return err @@ -31,6 +54,47 @@ func (g *GenericExecutor) Run(ctx context.Context, info ScenarioInfo) error { return r.Run(ctx) } +func (g *GenericExecutor) RecordCompletion() { + g.mu.Lock() + defer g.mu.Unlock() + + g.state.CompletedIterations += 1 + g.state.LastCompletedAt = time.Now() +} + +func (g *GenericExecutor) RecordError(err error) { + g.mu.Lock() + defer g.mu.Unlock() +} + +// GetState returns a copy of the current state +func (g *GenericExecutor) GetState() ExecutorState { + g.mu.Lock() + defer g.mu.Unlock() + + if g.state == nil { + return ExecutorState{} + } + return *g.state +} + +func (g *GenericExecutor) Snapshot() any { + return g.GetState() +} + +func (g *GenericExecutor) LoadState(loader func(any) error) error { + var state ExecutorState + if err := loader(&state); err != nil { + return err + } + + g.mu.Lock() + g.state = &state + g.mu.Unlock() + + return nil +} + func (g *GenericExecutor) newRun(info ScenarioInfo) (*genericRun, error) { info.Configuration.ApplyDefaults() if err := info.Configuration.Validate(); err != nil { @@ -130,11 +194,21 @@ func (g *genericRun) Run(ctx context.Context) error { defer func() { g.executeTimer.Record(time.Since(iterStart)) + // Check if this is the special "skip iteration" error + isSkipIteration := errors.Is(err, skipIterationErr) + if isSkipIteration { + err = nil // Don't propagate this as an actual error + } + select { case <-ctx.Done(): case doneCh <- err: - if err == nil && g.config.OnCompletion != nil { - g.config.OnCompletion(ctx, run) + if err == nil && !isSkipIteration { + g.executor.RecordCompletion() + g.logger.Debugf("✅ Workflow completed: iteration %v", run.Iteration) + if g.config.OnCompletion != nil { + g.config.OnCompletion(ctx, run) + } } } }() @@ -142,13 +216,27 @@ func (g *genericRun) Run(ctx context.Context) error { retryLoop: for { err = g.executor.Execute(ctx, run) + + // Skip if workflow was already started. + if err != nil { + var alreadyStartedErr *serviceerror.WorkflowExecutionAlreadyStarted + if errors.As(err, &alreadyStartedErr) { + g.logger.Debugf("Workflow already started, skipping iteration %v", run.Iteration) + err = skipIterationErr + break + } + } + + // If defined, invoke user-defined error handler. if err != nil && g.config.HandleExecuteError != nil { err = g.config.HandleExecuteError(ctx, run, err) } + if err == nil { break } + // Attempt to retry. backoff, retry := run.ShouldRetry(err) if retry { err = fmt.Errorf("iteration %v encountered error: %w", run.Iteration, err) @@ -156,6 +244,14 @@ func (g *genericRun) Run(ctx context.Context) error { } else { err = fmt.Errorf("iteration %v failed: %w", run.Iteration, err) g.logger.Error(err) + assert.Unreachable( + "Workflow execution should never return an error after retries exhausted", + map[string]any{ + "iteration": run.Iteration, + "error": err.Error(), + "attempt_count": run.attemptCount, + }, + ) break retryLoop } diff --git a/loadgen/generic_executor_test.go b/loadgen/generic_executor_test.go index 4eaf8582..9884ac28 100644 --- a/loadgen/generic_executor_test.go +++ b/loadgen/generic_executor_test.go @@ -42,6 +42,7 @@ func execute(executor *GenericExecutor, runConfig RunConfiguration) error { info := ScenarioInfo{ MetricsHandler: client.MetricsNopHandler, Logger: logger.Sugar(), + ExecutionID: "test-exec-id", Configuration: runConfig, } return executor.Run(context.Background(), info) @@ -258,3 +259,4 @@ func TestExecutorRetriesLimit(t *testing.T) { require.Equal(t, []int{1, 1, 1, 1, 1}, totalTracker.seen, "expected 5 attempts") }) } + diff --git a/loadgen/helpers.go b/loadgen/helpers.go index da1d9e64..49058442 100644 --- a/loadgen/helpers.go +++ b/loadgen/helpers.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "strings" - "time" "go.temporal.io/api/enums/v1" "go.temporal.io/api/operatorservice/v1" @@ -44,63 +43,61 @@ func InitSearchAttribute( return nil } -func MinVisibilityCountEventually( +func MinVisibilityCount( ctx context.Context, info ScenarioInfo, request *workflowservice.CountWorkflowExecutionsRequest, minCount int, - waitAtMost time.Duration, ) error { - timeoutCtx, cancel := context.WithTimeout(ctx, waitAtMost) - defer cancel() - - countTicker := time.NewTicker(3 * time.Second) - defer countTicker.Stop() + visibilityCount, err := info.Client.CountWorkflow(ctx, request) + if err != nil { + return fmt.Errorf("failed to count workflows in visibility: %w", err) + } - printTicker := time.NewTicker(30 * time.Second) - defer printTicker.Stop() + if visibilityCount.Count < int64(minCount) { + return fmt.Errorf("expected at least %d workflows in visibility, got %d", + minCount, visibilityCount.Count) + } - var lastVisibilityCount int64 - done := false + return nil +} - check := func() error { - visibilityCount, err := info.Client.CountWorkflow(timeoutCtx, request) - if err != nil { - return fmt.Errorf("failed to count workflows in visibility: %w", err) - } - lastVisibilityCount = visibilityCount.Count - if lastVisibilityCount >= int64(minCount) { - done = true - } - return nil +// GetNonCompletedWorkflows queries and returns an error for each non-completed workflow. +// Returns a list of errors (one per non-completed workflow) with workflow details, or a query error if the list fails. +func GetNonCompletedWorkflows(ctx context.Context, info ScenarioInfo, searchAttribute, runID string, limit int32) []error { + nonCompletedQuery := fmt.Sprintf( + "%s='%s' AND ExecutionStatus != 'Completed' AND ExecutionStatus != 'ContinuedAsNew'", + searchAttribute, + runID, + ) + + info.Logger.Infof("Visibility query for non-completed workflows - CLI command: temporal workflow list --namespace %s --query %q", + info.Namespace, nonCompletedQuery) + + resp, err := info.Client.ListWorkflow(ctx, &workflowservice.ListWorkflowExecutionsRequest{ + Namespace: info.Namespace, + Query: nonCompletedQuery, + PageSize: limit, + }) + + if err != nil { + return []error{fmt.Errorf("failed to list non-completed workflows: %w", err)} } - // Initial check before entering the loop. - if err := check(); err != nil { - return err + if len(resp.Executions) == 0 { + return nil } - // Loop until we reach the desired count or timeout. - for !done { - select { - case <-timeoutCtx.Done(): - return fmt.Errorf( - "expected at least %d workflows in visibility, got %d after waiting %v", - minCount, lastVisibilityCount, waitAtMost, - ) - - case <-printTicker.C: - info.Logger.Infof("current visibility count: %d (expected at least: %d)\n", - lastVisibilityCount, minCount) - - case <-countTicker.C: - if err := check(); err != nil { - return err - } - } + var workflowErrors []error + for _, exec := range resp.Executions { + workflowErrors = append(workflowErrors, fmt.Errorf( + "non-completed workflow: Namespace=%s, WorkflowID=%s, RunID=%s, Status=%s", + info.Namespace, + exec.Execution.WorkflowId, + exec.Execution.RunId, + exec.Status.String())) } - - return nil + return workflowErrors } // VerifyNoFailedWorkflows verifies that there are no failed or terminated workflows for the given search attribute. @@ -114,6 +111,8 @@ func VerifyNoFailedWorkflows(ctx context.Context, info ScenarioInfo, searchAttri statusQuery := fmt.Sprintf( "%s='%s' and ExecutionStatus = '%s'", searchAttribute, runID, status) + info.Logger.Infof("Visibility query for %s workflows - CLI command: temporal workflow count --namespace %s --query %q", + status.String(), info.Namespace, statusQuery) visibilityCount, err := info.Client.CountWorkflow(ctx, &workflowservice.CountWorkflowExecutionsRequest{ Namespace: info.Namespace, Query: statusQuery, diff --git a/loadgen/kitchen_sink_executor.go b/loadgen/kitchen_sink_executor.go index 3bd4ce88..4ec73963 100644 --- a/loadgen/kitchen_sink_executor.go +++ b/loadgen/kitchen_sink_executor.go @@ -8,6 +8,8 @@ import ( ) type KitchenSinkExecutor struct { + GenericExecutor + TestInput *kitchensink.TestInput // Called once on start @@ -18,29 +20,28 @@ type KitchenSinkExecutor struct { UpdateWorkflowOptions func(context.Context, *Run, *KitchenSinkWorkflowOptions) error } -func (k KitchenSinkExecutor) Run(ctx context.Context, info ScenarioInfo) error { +func (k *KitchenSinkExecutor) Run(ctx context.Context, info ScenarioInfo) error { if k.PrepareTestInput != nil { if err := k.PrepareTestInput(ctx, info, k.TestInput); err != nil { return err } } - // Create generic executor and run it - ge := &GenericExecutor{ - Execute: func(ctx context.Context, run *Run) error { - options := run.DefaultKitchenSinkWorkflowOptions() - testInputClone, ok := proto.Clone(k.TestInput).(*kitchensink.TestInput) - if !ok { - panic("failed to clone test input") - } - options.Params = testInputClone - if k.UpdateWorkflowOptions != nil { - err := k.UpdateWorkflowOptions(ctx, run, &options) - if err != nil { - return err - } + + k.GenericExecutor.Execute = func(ctx context.Context, run *Run) error { + options := run.DefaultKitchenSinkWorkflowOptions() + testInputClone, ok := proto.Clone(k.TestInput).(*kitchensink.TestInput) + if !ok { + panic("failed to clone test input") + } + options.Params = testInputClone + if k.UpdateWorkflowOptions != nil { + err := k.UpdateWorkflowOptions(ctx, run, &options) + if err != nil { + return err } - return run.ExecuteKitchenSinkWorkflow(ctx, &options) - }, + } + return run.ExecuteKitchenSinkWorkflow(ctx, &options) } - return ge.Run(ctx, info) + + return k.GenericExecutor.Run(ctx, info) } diff --git a/loadgen/retry.go b/loadgen/retry.go new file mode 100644 index 00000000..a64d8211 --- /dev/null +++ b/loadgen/retry.go @@ -0,0 +1,39 @@ +package loadgen + +import ( + "context" + "time" +) + +// RetryUntilCtx repeatedly invokes fn until it reports completion or the context is done. +// - fn should return (true, nil) when the operation has succeeded and no further retries are needed. +// - If fn returns (true, err), the retry loop stops and err is returned. +// - If fn returns (false, err), the function will be retried after a backoff delay. +// Backoff starts at 1s and doubles each time up to a maximum of 10s. +// If the context is canceled or its deadline expires, the last non-nil error from fn is returned if present; +// otherwise, the context error is returned. +func RetryUntilCtx(ctx context.Context, fn func(context.Context) (bool, error)) error { + backoff := 1 * time.Second + for { + done, err := fn(ctx) + if done { + return err + } + + select { + case <-ctx.Done(): + if err != nil { + return err + } + return ctx.Err() + case <-time.After(backoff): + } + + if backoff < 10*time.Second { + backoff *= 2 + if backoff > 10*time.Second { + backoff = 10 * time.Second + } + } + } +} diff --git a/loadgen/scenario.go b/loadgen/scenario.go index 800bf550..43e8d866 100644 --- a/loadgen/scenario.go +++ b/loadgen/scenario.go @@ -2,6 +2,7 @@ package loadgen import ( "context" + "errors" "fmt" "maps" "path/filepath" @@ -13,8 +14,10 @@ import ( "go.temporal.io/api/enums/v1" "go.temporal.io/api/operatorservice/v1" + "go.temporal.io/api/serviceerror" "go.temporal.io/sdk/client" + "go.temporal.io/sdk/temporal" "go.uber.org/zap" "github.com/temporalio/omes/loadgen/kitchensink" @@ -23,6 +26,7 @@ import ( type Scenario struct { Description string ExecutorFn func() Executor + VerifyFn func(context.Context, ScenarioInfo, Executor) []error } // Executor for a scenario. @@ -31,6 +35,17 @@ type Executor interface { Run(context.Context, ScenarioInfo) error } +type ExecutorState struct { + // ExecutionID is the unique identifier for this particular execution of the scenario. + ExecutionID string `json:"executionID"` + // StartedAt is the timestamp when the executor run started. + StartedAt time.Time `json:"startedAt"` + // CompletedIterations tracks the number of successfully completed iterations. + CompletedIterations int `json:"completedIterations"` + // LastCompletedAt is the timestamp of the last completed workflow. + LastCompletedAt time.Time `json:"lastCompletedAt"` +} + // Optional interface that can be implemented by an [Executor] to allow it to be resumable. type Resumable interface { // LoadState loads a snapshot into the executor's internal state. @@ -54,6 +69,13 @@ type Configurable interface { Configure(ScenarioInfo) error } +// Verifier performs post-execution verifications and returns a list of errors. +type Verifier interface { + // VerifyRun performs post-execution verifications and returns a list of errors. + // The ExecutorState is provided by the caller. + VerifyRun(context.Context, ScenarioInfo, ExecutorState) []error +} + // ExecutorFunc is an [Executor] implementation for a function type ExecutorFunc func(context.Context, ScenarioInfo) error @@ -104,6 +126,9 @@ type ScenarioInfo struct { // and workflow ID prefix. This is a single value for the whole scenario, and // not a Workflow RunId. RunID string + // ExecutionID is a randomly generated ID that uniquely identifies this particular + // execution of the scenario. Combined with RunID, it ensures no two executions collide. + ExecutionID string // Metrics component for registering new metrics. MetricsHandler client.MetricsHandler // A zap logger. @@ -207,9 +232,6 @@ type RunConfiguration struct { // cannot use the SDK to register SAs, instead the SAs must be registered through the control plane. // Default is false. DoNotRegisterSearchAttributes bool - // IgnoreAlreadyStarted, if set, will not error when a workflow with the same ID already exists. - // Default is false. - IgnoreAlreadyStarted bool // OnCompletion, if set, is invoked after each successful iteration completes. OnCompletion func(context.Context, *Run) // HandleExecuteError, if set, is called when Execute returns an error, allowing transformation of errors. @@ -226,6 +248,18 @@ func (r *RunConfiguration) ApplyDefaults() { if r.MaxIterationAttempts == 0 { r.MaxIterationAttempts = DefaultMaxIterationAttempts } + if r.HandleExecuteError == nil { + r.HandleExecuteError = func(ctx context.Context, run *Run, err error) error { + if err != nil { + var alreadyStartedErr *serviceerror.WorkflowExecutionAlreadyStarted + if errors.As(err, &alreadyStartedErr) { + run.Logger.Debugf("Workflow already started, skipping iteration %v", run.Iteration) + return nil + } + } + return err + } + } } func (r RunConfiguration) Validate() error { @@ -268,35 +302,39 @@ func (s *ScenarioInfo) NewRun(iteration int) *Run { func (s *ScenarioInfo) RegisterDefaultSearchAttributes(ctx context.Context) error { if s.Client == nil { - // No client in some unit tests. Ideally this would be mocked but no mock operator service - // client is readily available. return nil } - // Ensure custom search attributes are registered that many scenarios rely on - _, err := s.Client.OperatorService().AddSearchAttributes(ctx, &operatorservice.AddSearchAttributesRequest{ - SearchAttributes: map[string]enums.IndexedValueType{ - "KS_Keyword": enums.INDEXED_VALUE_TYPE_KEYWORD, - "KS_Int": enums.INDEXED_VALUE_TYPE_INT, - }, - Namespace: s.Namespace, - }) - // Throw an error if the attributes could not be registered, but ignore already exists errs + + attrs := map[string]enums.IndexedValueType{ + "KS_Int": enums.INDEXED_VALUE_TYPE_INT, + "KS_Keyword": enums.INDEXED_VALUE_TYPE_KEYWORD, + OmesExecutionIDSearchAttribute: enums.INDEXED_VALUE_TYPE_KEYWORD, + } + alreadyExistsStrings := []string{ "already exists", "attributes mapping unavailble", } - if err != nil { - isAlreadyExistsErr := false - for _, s := range alreadyExistsStrings { - if strings.Contains(err.Error(), s) { - isAlreadyExistsErr = true - break - } + + var lastErr error + if err := RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + _, lastErr = s.Client.OperatorService().AddSearchAttributes(ctx, &operatorservice.AddSearchAttributesRequest{ + SearchAttributes: attrs, + Namespace: s.Namespace, + }) + if lastErr == nil { + return true, nil } - if !isAlreadyExistsErr { - return fmt.Errorf("failed to register search attributes: %w", err) + for _, substr := range alreadyExistsStrings { + if strings.Contains(lastErr.Error(), substr) { + return true, nil + } } + return false, lastErr + }); err != nil { + return fmt.Errorf("failed to register search attributes: %w", err) } + return nil } @@ -312,9 +350,13 @@ func (r *Run) TaskQueue() string { // DefaultStartWorkflowOptions gets default start workflow info. func (r *Run) DefaultStartWorkflowOptions() client.StartWorkflowOptions { return client.StartWorkflowOptions{ - TaskQueue: TaskQueueForRun(r.RunID), - ID: fmt.Sprintf("w-%s-%d", r.RunID, r.Iteration), - WorkflowExecutionErrorWhenAlreadyStarted: !r.Configuration.IgnoreAlreadyStarted, + ID: fmt.Sprintf("w-%s-%s-%d", r.RunID, r.ExecutionID, r.Iteration), + TaskQueue: TaskQueueForRun(r.RunID), + // Always return error so that Executor can handle it and record starts accurately. + WorkflowExecutionErrorWhenAlreadyStarted: true, + TypedSearchAttributes: temporal.NewSearchAttributes( + temporal.NewSearchAttributeKeyString(OmesExecutionIDSearchAttribute).ValueSet(r.ExecutionID), + ), } } @@ -380,10 +422,12 @@ func (r *Run) ExecuteKitchenSinkWorkflow(ctx context.Context, options *KitchenSi executeErr := executor.Handle.Get(cancelCtx, nil) if executeErr != nil { - return fmt.Errorf("failed to execute kitchen sink workflow: %w", executeErr) + return fmt.Errorf("failed to execute kitchen sink workflow (workflowID: %s, runID: %s): %w", + executor.Handle.GetID(), executor.Handle.GetRunID(), executeErr) } if clientActionsErr := clientActionsErrPtr.Load(); clientActionsErr != nil { - return fmt.Errorf("kitchen sink client actions failed: %w", *clientActionsErr) + return fmt.Errorf("kitchen sink client actions failed (workflowID: %s, runID: %s): %w", + executor.Handle.GetID(), executor.Handle.GetRunID(), *clientActionsErr) } return nil } diff --git a/loadgen/scenario_test.go b/loadgen/scenario_test.go index 6e809913..aa387db5 100644 --- a/loadgen/scenario_test.go +++ b/loadgen/scenario_test.go @@ -49,7 +49,7 @@ func TestScenarioConfigValidation(t *testing.T) { expectedErr: "iterations and duration are mutually exclusive", }, { - name: "both duration and start iteration (allowed)", + name: "both duration and start iteration", configuration: RunConfiguration{Duration: 3 * time.Second, StartFromIteration: 3}, expectedErr: "", }, diff --git a/loadgen/workflow_completion_verifier.go b/loadgen/workflow_completion_verifier.go new file mode 100644 index 00000000..49fc78b8 --- /dev/null +++ b/loadgen/workflow_completion_verifier.go @@ -0,0 +1,243 @@ +package loadgen + +import ( + "context" + "fmt" + "time" + + "go.temporal.io/api/workflowservice/v1" +) + +const OmesExecutionIDSearchAttribute = "OmesExecutionID" + +// WorkflowCompletionVerifier allows verifying the workflow completion count after a scenario completed. +type WorkflowCompletionVerifier struct { + // expectedWorkflowCount is an optional function to calculate the expected number of workflows + // from the ExecutorState. If nil, defaults to using state.CompletedIterations. + expectedWorkflowCount func(ExecutorState) int + + // info is the scenario information stored during initialization. + info ScenarioInfo +} + +// SetExpectedWorkflowCount sets a custom function to calculate the expected number of workflows. +// If not set, defaults to using state.CompletedIterations. +func (wct *WorkflowCompletionVerifier) SetExpectedWorkflowCount(fn func(ExecutorState) int) { + wct.expectedWorkflowCount = fn +} + +// NewWorkflowCompletionChecker creates a new checker with the given timeout. +// If timeout is zero, it uses a default of 30 seconds. +// Call this before the scenario is started to initialize and register search attributes. +func NewWorkflowCompletionChecker(ctx context.Context, info ScenarioInfo, timeout time.Duration) (*WorkflowCompletionVerifier, error) { + checker := &WorkflowCompletionVerifier{} + + if err := checker.init(ctx, info); err != nil { + return nil, err + } + + return checker, nil +} + +func (wct *WorkflowCompletionVerifier) init(ctx context.Context, info ScenarioInfo) error { + // Store the scenario info for later use + wct.info = info + + if info.Configuration.DoNotRegisterSearchAttributes { + return nil + } + + // Retry InitSearchAttribute until context deadline expires + retryTicker := time.NewTicker(2 * time.Second) + defer retryTicker.Stop() + + // Try immediately first + var lastErr error + if err := InitSearchAttribute(ctx, info, OmesExecutionIDSearchAttribute); err != nil { + lastErr = err + info.Logger.Warnf("failed to register search attribute %s, will retry: %v", + OmesExecutionIDSearchAttribute, err) + } else { + return nil + } + + // Retry loop until context deadline + for { + select { + case <-ctx.Done(): + // Context ended (deadline or cancellation). Return last error. + return fmt.Errorf("failed to register search attribute %s after retries: %w", + OmesExecutionIDSearchAttribute, lastErr) + case <-retryTicker.C: + // Don't perform retry if context is already done + if ctx.Err() != nil { + return fmt.Errorf("failed to register search attribute %s after retries: %w", + OmesExecutionIDSearchAttribute, lastErr) + } + if err := InitSearchAttribute(ctx, info, OmesExecutionIDSearchAttribute); err != nil { + lastErr = err + info.Logger.Warnf("failed to register search attribute %s, will retry: %v", + OmesExecutionIDSearchAttribute, err) + } else { + info.Logger.Infof("successfully registered search attribute %s after retries", + OmesExecutionIDSearchAttribute) + return nil + } + } + } +} + +// VerifyRun implements the Verifier interface. +// It checks that the expected number of workflows have completed using the provided state. +func (wct *WorkflowCompletionVerifier) VerifyRun(ctx context.Context, info ScenarioInfo, state ExecutorState) []error { + return wct.Verify(ctx, state) +} + +// Verify checks that the expected number of workflows have completed. +// It retries all checks until the context deadline is reached. +func (wct *WorkflowCompletionVerifier) Verify(ctx context.Context, state ExecutorState) []error { + // Calculate expected workflow count + expectedCount := state.CompletedIterations + if wct.expectedWorkflowCount != nil { + expectedCount = wct.expectedWorkflowCount(state) + } + + // (1) Verify that we have completions at all. + if expectedCount == 0 { + return []error{fmt.Errorf("no workflows completed")} + } + + // Setup retry loop + checkTicker := time.NewTicker(15 * time.Second) + defer checkTicker.Stop() + + printTicker := time.NewTicker(30 * time.Second) + defer printTicker.Stop() + + query := fmt.Sprintf( + "%s='%s' AND ExecutionStatus = 'Completed'", + OmesExecutionIDSearchAttribute, + wct.info.ExecutionID, + ) + + wct.info.Logger.Infof("Visibility query for completed workflows - CLI command: temporal workflow count --namespace %s --query %q", + wct.info.Namespace, query) + + var lastErrors []error + + // Function to perform all checks + performChecks := func() []error { + var allErrors []error + + // (2) Verify that all completed workflows have indeed completed. + err := MinVisibilityCount( + ctx, + wct.info, + &workflowservice.CountWorkflowExecutionsRequest{ + Namespace: wct.info.Namespace, + Query: query, + }, + expectedCount, + ) + if err != nil { + allErrors = append(allErrors, err) + } + + // (3) Verify that all started workflows have completed. + nonCompletedErrs := GetNonCompletedWorkflows( + ctx, + wct.info, + OmesExecutionIDSearchAttribute, + wct.info.ExecutionID, + 10, + ) + allErrors = append(allErrors, nonCompletedErrs...) + + return allErrors + } + + // Initial check + lastErrors = performChecks() + if len(lastErrors) == 0 { + return nil + } + + for { + select { + case <-ctx.Done(): + // Context ended (deadline or cancellation). Return last errors. + return lastErrors + case <-printTicker.C: + wct.info.Logger.Infof("verification still has error(s), retrying until deadline: %v", lastErrors) + case <-checkTicker.C: + // Don't perform checks if context is already done + if ctx.Err() != nil { + return lastErrors + } + lastErrors = performChecks() + if len(lastErrors) == 0 { + return nil + } + } + } +} + +// TODO: remove this +// VerifyNoRunningWorkflows waits until there are no running workflows on the task queue for the given run ID. +// This is useful for scenarios that want to ensure all started workflows have completed. +// It retries the check until the context deadline is reached. +func (wct *WorkflowCompletionVerifier) VerifyNoRunningWorkflows(ctx context.Context) error { + query := fmt.Sprintf("TaskQueue = %q and ExecutionStatus = 'Running'", + TaskQueueForRun(wct.info.RunID)) + + wct.info.Logger.Infof("Visibility query for running workflows - CLI command: temporal workflow count --namespace %s --query %q", + wct.info.Namespace, query) + + // Setup retry loop + checkTicker := time.NewTicker(3 * time.Second) + defer checkTicker.Stop() + + printTicker := time.NewTicker(30 * time.Second) + defer printTicker.Stop() + + var lastError error + + // Function to perform check + performCheck := func() error { + return MinVisibilityCount( + ctx, + wct.info, + &workflowservice.CountWorkflowExecutionsRequest{ + Namespace: wct.info.Namespace, + Query: query, + }, + 0, + ) + } + + // Initial check + lastError = performCheck() + if lastError == nil { + return nil + } + + // Retry loop until context deadline + for { + select { + case <-ctx.Done(): + // Context ended (deadline or cancellation). Return last error. + return lastError + case <-printTicker.C: + wct.info.Logger.Infof("still waiting for running workflows to complete, retrying until deadline...") + case <-checkTicker.C: + // Don't perform check if context is already done + if ctx.Err() != nil { + return lastError + } + lastError = performCheck() + if lastError == nil { + return nil + } + } + } +} diff --git a/scenarios/ebb_and_flow.go b/scenarios/ebb_and_flow.go index 35067911..815db969 100644 --- a/scenarios/ebb_and_flow.go +++ b/scenarios/ebb_and_flow.go @@ -2,7 +2,6 @@ package scenarios import ( "context" - "errors" "fmt" "math" "math/rand" @@ -11,12 +10,8 @@ import ( "time" "github.com/temporalio/omes/loadgen" - "github.com/temporalio/omes/loadgen/ebbandflow" - "go.temporal.io/api/workflowservice/v1" -) - -const ( - EbbAndFlowScenarioIdSearchAttribute = "EbbAndFlowScenarioId" + . "github.com/temporalio/omes/loadgen/kitchensink" + "go.temporal.io/api/common/v1" ) const ( @@ -56,9 +51,7 @@ type ebbAndFlowConfig struct { } type ebbAndFlowState struct { - // TotalCompletedWorkflows tracks the total number of completed workflows across - // all restarts. It is used to verify workflow counts after the scenario completes. - TotalCompletedWorkflows int64 `json:"totalCompletedWorkflows"` + ExecutorState loadgen.ExecutorState `json:"executorState"` } type ebbAndFlowExecutor struct { @@ -72,6 +65,8 @@ type ebbAndFlowExecutor struct { completedActivities atomic.Int64 stateLock sync.Mutex state *ebbAndFlowState + completionVerifier *loadgen.WorkflowCompletionVerifier + executorState *loadgen.ExecutorState } var _ loadgen.Configurable = (*ebbAndFlowExecutor)(nil) @@ -85,6 +80,13 @@ func init() { " control-interval, max-consecutive-errors, backlog-log-interval.\n" + "Duration must be set.", ExecutorFn: func() loadgen.Executor { return newEbbAndFlowExecutor() }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*ebbAndFlowExecutor) + if e.completionVerifier == nil || e.executorState == nil { + return nil + } + return e.completionVerifier.VerifyRun(ctx, info, *e.executorState) + }, }) } @@ -143,6 +145,14 @@ func (e *ebbAndFlowExecutor) Configure(info loadgen.ScenarioInfo) error { return nil } +// VerifyRun implements the Verifier interface. +func (e *ebbAndFlowExecutor) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo, state loadgen.ExecutorState) []error { + if e.completionVerifier == nil || e.executorState == nil { + return nil + } + return e.completionVerifier.VerifyRun(ctx, info, *e.executorState) +} + // Run executes the ebb and flow scenario. func (e *ebbAndFlowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { if err := e.Configure(info); err != nil { @@ -150,7 +160,7 @@ func (e *ebbAndFlowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) } e.ScenarioInfo = info - e.id = fmt.Sprintf("ebb_and_flow_%s", e.RunID) + e.id = fmt.Sprintf("ebb_and_flow_%s", e.ExecutionID) e.rng = rand.New(rand.NewSource(time.Now().UnixNano())) e.startTime = time.Now() @@ -160,15 +170,25 @@ func (e *ebbAndFlowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) return fmt.Errorf("configuration not parsed - Parse must be called before run") } - // Initialize search attribute for visibility tracking - err := loadgen.InitSearchAttribute( - ctx, - e.ScenarioInfo, - EbbAndFlowScenarioIdSearchAttribute, - ) + // Initialize executor state if needed + if e.executorState == nil { + e.executorState = &loadgen.ExecutorState{ + ExecutionID: info.ExecutionID, + } + } + + // Restore state if resuming + if e.isResuming && e.state != nil { + *e.executorState = e.state.ExecutorState + } + + // Initialize workflow completion checker with timeout from scenario options + timeout := info.ScenarioOptionDuration(VisibilityVerificationTimeoutFlag, 30*time.Second) + checker, err := loadgen.NewWorkflowCompletionChecker(ctx, info, timeout) if err != nil { - return fmt.Errorf("failed to initialize search attribute %s: %w", EbbAndFlowScenarioIdSearchAttribute, err) + return fmt.Errorf("failed to initialize completion checker: %w", err) } + e.completionVerifier = checker var consecutiveErrCount int errCh := make(chan error, 10000) @@ -226,34 +246,9 @@ func (e *ebbAndFlowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) e.Logger.Info("Scenario complete; waiting for all workflows to finish...") startWG.Wait() - e.Logger.Info("Verifying scenario completion...") - - e.stateLock.Lock() - totalCompletedWorkflows := int(e.state.TotalCompletedWorkflows) - e.stateLock.Unlock() - - // Post-scenario: verify that at least one workflow was completed. - if totalCompletedWorkflows == 0 { - return errors.New("No iterations completed. Either the scenario never ran, or it failed to resume correctly.") - } - - // Post-scenario: verify reported workflow completion count from Visibility. - if err := loadgen.MinVisibilityCountEventually( - ctx, - e.ScenarioInfo, - &workflowservice.CountWorkflowExecutionsRequest{ - Namespace: e.Namespace, - Query: fmt.Sprintf("%s='%s'", - EbbAndFlowScenarioIdSearchAttribute, e.id), - }, - totalCompletedWorkflows, - config.VisibilityVerificationTimeout, - ); err != nil { - return err - } + e.Logger.Info("Scenario execution complete") - // Post-scenario: ensure there are no failed or terminated workflows for this run. - return loadgen.VerifyNoFailedWorkflows(ctx, e.ScenarioInfo, EbbAndFlowScenarioIdSearchAttribute, e.ScenarioInfo.RunID) + return nil } // Snapshot returns a snapshot of the current state. @@ -261,7 +256,9 @@ func (e *ebbAndFlowExecutor) Snapshot() any { e.stateLock.Lock() defer e.stateLock.Unlock() - return *e.state + return ebbAndFlowState{ + ExecutorState: *e.executorState, + } } // LoadState loads the state from the provided loader function. @@ -292,44 +289,67 @@ func (e *ebbAndFlowExecutor) spawnWorkflowWithActivities( Groups: template.Groups, } - // Start workflow. + // Sample activities from the configuration + rng := rand.New(rand.NewSource(time.Now().UnixNano())) + activityActions := config.Sample(rng) + + // Build actions for the kitchensink workflow + var actions []*Action + for _, activityAction := range activityActions { + actions = append(actions, &Action{ + Variant: &Action_ExecActivity{ + ExecActivity: activityAction, + }, + }) + } + + // Start workflow using kitchensink. run := e.NewRun(int(iteration)) options := run.DefaultStartWorkflowOptions() options.ID = fmt.Sprintf("%s-track-%d", e.id, iteration) options.WorkflowExecutionErrorWhenAlreadyStarted = false - options.SearchAttributes = map[string]interface{}{ - EbbAndFlowScenarioIdSearchAttribute: e.id, - } - workflowInput := &ebbandflow.WorkflowParams{ - SleepActivities: &config, + workflowInput := &WorkflowInput{ + InitialActions: []*ActionSet{ + { + Actions: actions, + Concurrent: true, + }, + { + Actions: []*Action{ + { + Variant: &Action_ReturnResult{ + ReturnResult: &ReturnResultAction{ + ReturnThis: &common.Payload{}, + }, + }, + }, + }, + }, + }, } - // Start workflow to track activity timings. - wf, err := e.Client.ExecuteWorkflow(ctx, options, "ebbAndFlowTrack", workflowInput) + // Start workflow using kitchensink. + wf, err := e.Client.ExecuteWorkflow(ctx, options, "kitchenSink", workflowInput) if err != nil { - return fmt.Errorf("failed to start ebbAndFlowTrack workflow for iteration %d: %w", iteration, err) + return fmt.Errorf("failed to start kitchensink workflow for iteration %d: %w", iteration, err) } e.scheduledActivities.Add(activities) // Wait for workflow completion - var result ebbandflow.WorkflowOutput - err = wf.Get(ctx, &result) + err = wf.Get(ctx, nil) if err != nil { - e.Logger.Errorf("ebbAndFlowTrack workflow failed for iteration %d: %v", iteration, err) + e.Logger.Errorf("kitchensink workflow failed for iteration %d: %v", iteration, err) } e.completedActivities.Add(activities) - e.incrementTotalCompletedWorkflow() - - return nil -} -func (e *ebbAndFlowExecutor) incrementTotalCompletedWorkflow() { + // Record completion in executor state for verification e.stateLock.Lock() - if e.state != nil { - e.state.TotalCompletedWorkflows++ - } + e.executorState.CompletedIterations++ + e.executorState.LastCompletedAt = time.Now() e.stateLock.Unlock() + + return nil } func calculateBacklogTarget( diff --git a/scenarios/ebb_and_flow_test.go b/scenarios/ebb_and_flow_test.go index 443925af..2c6aad6f 100644 --- a/scenarios/ebb_and_flow_test.go +++ b/scenarios/ebb_and_flow_test.go @@ -72,7 +72,7 @@ func TestEbbAndFlow(t *testing.T) { require.NoError(t, err, "Executor should complete successfully") state = executor.Snapshot().(ebbAndFlowState) - require.GreaterOrEqual(t, state.TotalCompletedWorkflows, int64(1)) + require.GreaterOrEqual(t, state.ExecutorState.CompletedIterations, 1) }) t.Run("Run executor again, resuming from previous state", func(t *testing.T) { @@ -93,7 +93,7 @@ func TestEbbAndFlow(t *testing.T) { require.NoError(t, err, "Executor should complete successfully") state = executor.Snapshot().(ebbAndFlowState) - require.Greater(t, state.TotalCompletedWorkflows, previouState.TotalCompletedWorkflows) + require.Greater(t, state.ExecutorState.CompletedIterations, previouState.ExecutorState.CompletedIterations) }) t.Run("Run executor again, resuming from previous state but without any time left", func(t *testing.T) { diff --git a/scenarios/fixed_resource_consumption.go b/scenarios/fixed_resource_consumption.go index 94dbb5a6..c9136056 100644 --- a/scenarios/fixed_resource_consumption.go +++ b/scenarios/fixed_resource_consumption.go @@ -1,6 +1,7 @@ package scenarios import ( + "context" "math" "math/rand" "time" @@ -14,6 +15,23 @@ import ( // This scenario is meant to be adjusted and run manually to evaluate the performance of different // slot provider implementations +type fixedResourceExecutor struct { + *loadgen.KitchenSinkExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *fixedResourceExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the kitchen sink executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + func parallelResourcesActions( numConccurrent int, bytesToAlloc int, @@ -63,7 +81,8 @@ func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Used for testing slot provider performance. Runs activities that consume certain amounts of resources.", ExecutorFn: func() loadgen.Executor { - return loadgen.KitchenSinkExecutor{ + return &fixedResourceExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ TestInput: &kitchensink.TestInput{ WorkflowInput: &kitchensink.WorkflowInput{ InitialActions: []*kitchensink.ActionSet{ @@ -123,7 +142,16 @@ func init() { }, }, }, + }, + } + }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*fixedResourceExecutor) + if e.completionVerifier == nil { + return nil } + state := e.KitchenSinkExecutor.GetState() + return e.completionVerifier.VerifyRun(ctx, info, state) }, }) } diff --git a/scenarios/fuzzer.go b/scenarios/fuzzer.go index af62408a..4f7314b4 100644 --- a/scenarios/fuzzer.go +++ b/scenarios/fuzzer.go @@ -2,43 +2,74 @@ package scenarios import ( "context" + "time" "github.com/temporalio/omes/loadgen" ) +type fuzzerExecutor struct { + fuzzExecutor loadgen.FuzzExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *fuzzerExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the fuzz executor + return e.fuzzExecutor.Run(ctx, info) +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "This scenario uses the kitchen sink input generation tool to run fuzzy" + " workflows", ExecutorFn: func() loadgen.Executor { - return loadgen.FuzzExecutor{ - InitInputs: func(ctx context.Context, info loadgen.ScenarioInfo) loadgen.FileOrArgs { - fPath, ok := info.ScenarioOptions["input-file"] - if ok && fPath != "" { - return loadgen.FileOrArgs{ - FilePath: fPath, + return &fuzzerExecutor{ + fuzzExecutor: loadgen.FuzzExecutor{ + InitInputs: func(ctx context.Context, info loadgen.ScenarioInfo) loadgen.FileOrArgs { + fPath, ok := info.ScenarioOptions["input-file"] + if ok && fPath != "" { + return loadgen.FileOrArgs{ + FilePath: fPath, + } } - } - args := []string{"generate"} - seed, ok := info.ScenarioOptions["seed"] - if ok && seed != "" { - args = append(args, "--explicit-seed", seed) - } - config, ok := info.ScenarioOptions["config"] - if ok && config != "" { - args = append(args, "--generator-config-override", config) - } - _, ok = info.ScenarioOptions["no-output-file"] - if !ok { - args = append(args, "--output-path", "last_fuzz_run.proto") - } - return loadgen.FileOrArgs{ - Args: args, - } + args := []string{"generate"} + seed, ok := info.ScenarioOptions["seed"] + if ok && seed != "" { + args = append(args, "--explicit-seed", seed) + } + config, ok := info.ScenarioOptions["config"] + if ok && config != "" { + args = append(args, "--generator-config-override", config) + } + _, ok = info.ScenarioOptions["no-output-file"] + if !ok { + args = append(args, "--output-path", "last_fuzz_run.proto") + } + return loadgen.FileOrArgs{ + Args: args, + } + }, + DefaultConfiguration: loadgen.RunConfiguration{}, }, - DefaultConfiguration: loadgen.RunConfiguration{}, } }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*fuzzerExecutor) + if e.completionVerifier == nil { + return nil + } + // Get state from the embedded generic executor (FuzzExecutor creates one internally) + state := loadgen.ExecutorState{ + CompletedIterations: info.Configuration.Iterations, + } + return e.completionVerifier.VerifyRun(ctx, info, state) + }, }) } diff --git a/scenarios/fuzzer_example.go b/scenarios/fuzzer_example.go index 26fb2dc3..0d6715c7 100644 --- a/scenarios/fuzzer_example.go +++ b/scenarios/fuzzer_example.go @@ -2,23 +2,54 @@ package scenarios import ( "context" + "time" "github.com/temporalio/omes/loadgen" ) +type fuzzerExampleExecutor struct { + fuzzExecutor loadgen.FuzzExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *fuzzerExampleExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the fuzz executor + return e.fuzzExecutor.Run(ctx, info) +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "This scenario runs the kitchen sink input generation tool `example` " + "command to help with basic verification of KS implementations.", ExecutorFn: func() loadgen.Executor { - return loadgen.FuzzExecutor{ - InitInputs: func(ctx context.Context, info loadgen.ScenarioInfo) loadgen.FileOrArgs { - return loadgen.FileOrArgs{ - Args: []string{"example"}, - } + return &fuzzerExampleExecutor{ + fuzzExecutor: loadgen.FuzzExecutor{ + InitInputs: func(ctx context.Context, info loadgen.ScenarioInfo) loadgen.FileOrArgs { + return loadgen.FileOrArgs{ + Args: []string{"example"}, + } + }, + DefaultConfiguration: loadgen.RunConfiguration{}, }, - DefaultConfiguration: loadgen.RunConfiguration{}, } }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*fuzzerExampleExecutor) + if e.completionVerifier == nil { + return nil + } + // Get state from the embedded generic executor (FuzzExecutor creates one internally) + state := loadgen.ExecutorState{ + CompletedIterations: info.Configuration.Iterations, + } + return e.completionVerifier.VerifyRun(ctx, info, state) + }, }) } diff --git a/scenarios/scheduler_stress.go b/scenarios/scheduler_stress.go index d040d509..9fc28220 100644 --- a/scenarios/scheduler_stress.go +++ b/scenarios/scheduler_stress.go @@ -43,6 +43,11 @@ func init() { }, } }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + // Scheduler stress scenario manages its own lifecycle and cleanup + // No additional verification needed beyond what happens during execution + return nil + }, }) } diff --git a/scenarios/state_transitions_steady.go b/scenarios/state_transitions_steady.go index 3cc054a0..ac0573fe 100644 --- a/scenarios/state_transitions_steady.go +++ b/scenarios/state_transitions_steady.go @@ -8,9 +8,18 @@ import ( "github.com/temporalio/omes/loadgen" "github.com/temporalio/omes/loadgen/kitchensink" - "go.temporal.io/api/workflowservice/v1" ) +type steadyStateConfig struct { + MaxConsecutiveErrors int +} + +type stateTransitionsSteadyExecutor struct { + loadgen.ScenarioInfo + config *steadyStateConfig + completionVerifier *loadgen.WorkflowCompletionVerifier +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Run a certain number of state transitions per second. This requires duration option to be set " + @@ -18,16 +27,42 @@ func init() { "example, can be run with: run-scenario-with-worker --scenario state_transitions_steady --language go " + "--embedded-server --duration 5m --option state-transitions-per-second=3", ExecutorFn: func() loadgen.Executor { - return loadgen.ExecutorFunc(func(ctx context.Context, runOptions loadgen.ScenarioInfo) error { - return (&stateTransitionsSteady{runOptions}).run(ctx) - }) + return &stateTransitionsSteadyExecutor{} + }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*stateTransitionsSteadyExecutor) + if e.completionVerifier == nil { + return nil + } + // For state transitions steady, we just need to verify no running workflows + return []error{e.completionVerifier.VerifyNoRunningWorkflows(ctx)} }, }) } -type stateTransitionsSteady struct{ loadgen.ScenarioInfo } +var _ loadgen.Configurable = (*stateTransitionsSteadyExecutor)(nil) + +// Configure initializes the steadyStateConfig by reading scenario options +func (s *stateTransitionsSteadyExecutor) Configure(info loadgen.ScenarioInfo) error { + s.ScenarioInfo = info + s.config = &steadyStateConfig{ + MaxConsecutiveErrors: s.ScenarioOptionInt(MaxConsecutiveErrorsFlag, 5), + } + if s.config.MaxConsecutiveErrors < 1 { + return fmt.Errorf("%s must be at least 1, got %d", MaxConsecutiveErrorsFlag, s.config.MaxConsecutiveErrors) + } + return nil +} + +// Run executes the state transitions steady scenario +func (s *stateTransitionsSteadyExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + if err := s.Configure(info); err != nil { + return fmt.Errorf("failed to parse scenario configuration: %w", err) + } + return s.run(ctx) +} -func (s *stateTransitionsSteady) run(ctx context.Context) error { +func (s *stateTransitionsSteadyExecutor) run(ctx context.Context) error { // The goal here is to meet a certain number of state transitions per second. // For us this means a certain number of workflows per second. So we must // first execute a basic workflow (i.e. with a simple activity) and get the @@ -50,6 +85,12 @@ func (s *stateTransitionsSteady) run(ctx context.Context) error { durationPerStateTransition, ) + completionChecker, err := loadgen.NewWorkflowCompletionChecker(ctx, s.ScenarioInfo, time.Minute) + if err != nil { + return fmt.Errorf("failed to create workflow completion checker: %w", err) + } + s.completionVerifier = completionChecker + // Execute initial workflow and get the transition count workflowParams := &kitchensink.WorkflowInput{ InitialActions: []*kitchensink.ActionSet{ @@ -84,7 +125,6 @@ func (s *stateTransitionsSteady) run(ctx context.Context) error { // Start a workflow every X interval until duration reached or there are N // start failures in a row - const maxConsecutiveErrors = 5 errCh := make(chan error, 10000) ticker := time.NewTicker(workflowStartInterval) defer ticker.Stop() @@ -100,8 +140,8 @@ func (s *stateTransitionsSteady) run(ctx context.Context) error { consecutiveErrCount = 0 } else { consecutiveErrCount++ - if consecutiveErrCount >= maxConsecutiveErrors { - return fmt.Errorf("got %v consecutive errors, most recent: %w", maxConsecutiveErrors, err) + if consecutiveErrCount >= s.config.MaxConsecutiveErrors { + return fmt.Errorf("got %v consecutive errors, most recent: %w", s.config.MaxConsecutiveErrors, err) } } case <-ticker.C: @@ -130,15 +170,6 @@ func (s *stateTransitionsSteady) run(ctx context.Context) error { s.Logger.Infof("Run complete, ran %v iterations, waiting on all workflows to complete", iter) // First, wait for all starts to have started (they are done in goroutine) startWG.Wait() - return loadgen.MinVisibilityCountEventually( - ctx, - s.ScenarioInfo, - &workflowservice.CountWorkflowExecutionsRequest{ - Namespace: s.Namespace, - Query: fmt.Sprintf("TaskQueue = %q and ExecutionStatus = 'Running'", - loadgen.TaskQueueForRun(s.RunID)), - }, - 0, - time.Minute, - ) + + return completionChecker.VerifyNoRunningWorkflows(ctx) } diff --git a/scenarios/stuck_workflow.go b/scenarios/stuck_workflow.go new file mode 100644 index 00000000..0fa665bd --- /dev/null +++ b/scenarios/stuck_workflow.go @@ -0,0 +1,123 @@ +package scenarios + +import ( + "context" + "time" + + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/loadgen/kitchensink" + "go.temporal.io/api/common/v1" + "go.temporal.io/sdk/converter" +) + +// stuckWorkflowExecutor wraps KitchenSinkExecutor and implements Verifier interface +// to detect stuck workflows using WorkflowCompletionVerifier. +type stuckWorkflowExecutor struct { + *loadgen.KitchenSinkExecutor + verifier *loadgen.WorkflowCompletionVerifier +} + +var _ loadgen.Verifier = (*stuckWorkflowExecutor)(nil) +var _ loadgen.Resumable = (*stuckWorkflowExecutor)(nil) + +func (e *stuckWorkflowExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create the verifier before running + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.verifier = verifier + + // Run the embedded executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + +func (e *stuckWorkflowExecutor) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo, state loadgen.ExecutorState) []error { + if e.verifier == nil { + return nil + } + return e.verifier.VerifyRun(ctx, info, state) +} + +func (e *stuckWorkflowExecutor) Snapshot() any { + return e.KitchenSinkExecutor.Snapshot() +} + +func (e *stuckWorkflowExecutor) LoadState(loader func(any) error) error { + return e.KitchenSinkExecutor.LoadState(loader) +} + +func init() { + loadgen.MustRegisterScenario(loadgen.Scenario{ + Description: "Test scenario where the first iteration blocks forever (stuck workflow), " + + "even iterations use Continue-As-New, and odd iterations complete normally. " + + "Used for testing workflow completion detection.", + ExecutorFn: func() loadgen.Executor { + return &stuckWorkflowExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ + TestInput: &kitchensink.TestInput{ + WorkflowInput: &kitchensink.WorkflowInput{}, + }, + UpdateWorkflowOptions: func(ctx context.Context, run *loadgen.Run, options *loadgen.KitchenSinkWorkflowOptions) error { + // Only the first iteration should block forever. + if run.Iteration == 1 { + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + { + Actions: []*kitchensink.Action{ + { + Variant: &kitchensink.Action_AwaitWorkflowState{ + AwaitWorkflowState: &kitchensink.AwaitWorkflowState{ + Key: "will-never-be-set", + Value: "never", + }, + }, + }, + }, + }, + } + } else if run.Iteration%2 == 0 { + // Have some Continue-As-New. + // ContinueAsNew needs to pass the workflow input as the first argument. + // We pass a simple completion action to make the continued workflow complete immediately. + workflowInput, err := converter.GetDefaultDataConverter().ToPayload( + &kitchensink.WorkflowInput{ + InitialActions: []*kitchensink.ActionSet{ + kitchensink.NoOpSingleActivityActionSet(), + }, + }) + if err != nil { + return err + } + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + { + Actions: []*kitchensink.Action{ + { + Variant: &kitchensink.Action_ContinueAsNew{ + ContinueAsNew: &kitchensink.ContinueAsNewAction{ + Arguments: []*common.Payload{workflowInput}, + }, + }, + }, + }, + }, + } + } else { + options.Params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + kitchensink.NoOpSingleActivityActionSet(), + } + } + return nil + }, + }, + } + }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*stuckWorkflowExecutor) + if e.verifier == nil || e.KitchenSinkExecutor == nil { + return nil + } + state := e.KitchenSinkExecutor.Snapshot().(loadgen.ExecutorState) + return e.verifier.VerifyRun(ctx, info, state) + }, + }) +} diff --git a/scenarios/stuck_workflow_test.go b/scenarios/stuck_workflow_test.go new file mode 100644 index 00000000..7f43cc30 --- /dev/null +++ b/scenarios/stuck_workflow_test.go @@ -0,0 +1,105 @@ +package scenarios + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/temporalio/omes/cmd/clioptions" + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/workers" +) + +// TestStuckWorkflowScenario verifies that the stuck_workflow scenario correctly detects +// stuck workflows through its VerifyFn implementation. +func TestStuckWorkflowScenario(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(5*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("stuck-test-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 10, + }, + } + + // Get the stuck_workflow scenario + scenario := loadgen.GetScenario("stuck_workflow") + require.NotNil(t, scenario, "stuck_workflow scenario should be registered") + require.NotNil(t, scenario.VerifyFn, "stuck_workflow scenario should have a VerifyFn") + + executor := scenario.ExecutorFn() + + // RunExecutorTest will automatically run verification since the scenario has a VerifyFn + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "should fail due to stuck workflow detected by verification") + require.Contains(t, err.Error(), "deadline exceeded", "should report deadline exceeded for stuck iteration") +} + +// TestStuckWorkflowVerifyFnDetectsStuckWorkflow tests that the VerifyFn properly identifies +// stuck workflows after execution. +func TestStuckWorkflowVerifyFnDetectsStuckWorkflow(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(5*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("stuck-verify-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 5, + }, + } + + // Get the stuck_workflow scenario + scenario := loadgen.GetScenario("stuck_workflow") + require.NotNil(t, scenario, "stuck_workflow scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor and expect it to fail + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "should fail due to verification detecting stuck workflow") + + // The error should indicate a non-completed workflow was detected + require.Contains(t, err.Error(), "non-completed workflow", "verification should report stuck workflow") +} + +// TestStuckWorkflowScenarioIterationBehavior tests that the stuck_workflow scenario +// behaves correctly across multiple iterations: +// - Iteration 1: blocks forever (stuck workflow) +// - Even iterations: use Continue-As-New +// - Odd iterations (except 1): complete normally +func TestStuckWorkflowScenarioIterationBehavior(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(5*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("stuck-behavior-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 7, // Test iterations 1-7 + }, + } + + // Get the stuck_workflow scenario + scenario := loadgen.GetScenario("stuck_workflow") + require.NotNil(t, scenario, "stuck_workflow scenario should be registered") + + executor := scenario.ExecutorFn() + + // RunExecutorTest will fail because iteration 1 will be stuck + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "should fail due to stuck workflow on iteration 1") + + // Verify the executor state shows 6 completed iterations (all except iteration 1) + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 6, execState.CompletedIterations, + "should complete 6 iterations (iterations 2-7, skipping stuck iteration 1)") +} diff --git a/scenarios/throughput_stress.go b/scenarios/throughput_stress.go index f93829f1..55e855e4 100644 --- a/scenarios/throughput_stress.go +++ b/scenarios/throughput_stress.go @@ -3,19 +3,17 @@ package scenarios import ( "cmp" "context" - "errors" "fmt" "hash/fnv" "math/rand" - "strings" "sync" "time" "github.com/temporalio/omes/loadgen" . "github.com/temporalio/omes/loadgen/kitchensink" "go.temporal.io/api/common/v1" + "go.temporal.io/api/enums/v1" "go.temporal.io/api/workflowservice/v1" - "go.temporal.io/sdk/temporal" "google.golang.org/protobuf/types/known/emptypb" ) @@ -39,20 +37,13 @@ const ( // MinThroughputPerHourFlag is the minimum workflow throughput required (workflows/hour). // Default is 0, meaning disabled. The scenario calculates actual throughput and compares. MinThroughputPerHourFlag = "min-throughput-per-hour" -) - -const ( - ThroughputStressScenarioIdSearchAttribute = "ThroughputStressScenarioId" + // DisableLocalActivitiesFlag converts all local activities to remote activities when set to true. + // Default is false, meaning local activities will be used as designed. + DisableLocalActivitiesFlag = "disable-local-activities" ) type tpsState struct { - // CompletedIterations is the number of iteration that have been completed. - CompletedIterations int `json:"completedIterations"` - // LastCompletedIterationAt is the time when the last iteration was completed. Helpful for debugging. - LastCompletedIterationAt time.Time `json:"lastCompletedIterationAt"` - // AccumulatedDuration is the total execution time across all runs (original + resumes). - // This excludes any downtime between runs. Used for accurate throughput calculation. - AccumulatedDuration time.Duration `json:"accumulatedDuration"` + ExecutorState any `json:"executorState"` } type tpsConfig struct { @@ -67,15 +58,18 @@ type tpsConfig struct { MinThroughputPerHour float64 ScenarioRunID string RngSeed int64 + DisableLocalActivities bool } type tpsExecutor struct { - lock sync.Mutex - state *tpsState - config *tpsConfig - isResuming bool - runID string - rng *rand.Rand + executor *loadgen.KitchenSinkExecutor + tpsVerifier *tpsVerifier + lock sync.Mutex + state *tpsState + config *tpsConfig + isResuming bool + runID string + rng *rand.Rand } var _ loadgen.Resumable = (*tpsExecutor)(nil) @@ -87,6 +81,14 @@ func init() { "Throughput stress scenario. Use --option with '%s', '%s' to control internal parameters", IterFlag, ContinueAsNewAfterIterFlag), ExecutorFn: func() loadgen.Executor { return newThroughputStressExecutor() }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + t := executor.(*tpsExecutor) + if t.tpsVerifier == nil || t.executor == nil { + return nil + } + state := t.executor.GetState() + return t.VerifyRun(ctx, info, state) + }, }) } @@ -99,7 +101,13 @@ func (t *tpsExecutor) Snapshot() any { t.lock.Lock() defer t.lock.Unlock() - return *t.state + if t.executor == nil { + return *t.state + } + + return tpsState{ + ExecutorState: t.executor.Snapshot(), + } } // LoadState loads the state from the provided byte slice. @@ -118,6 +126,14 @@ func (t *tpsExecutor) LoadState(loader func(any) error) error { return nil } +// VerifyRun implements the Verifier interface. +func (t *tpsExecutor) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo, state loadgen.ExecutorState) []error { + if t.tpsVerifier == nil || t.executor == nil { + return nil + } + return t.tpsVerifier.VerifyRun(ctx, info, state) +} + // Configure initializes tpsConfig. Largely, it reads and validates throughput_stress scenario options func (t *tpsExecutor) Configure(info loadgen.ScenarioInfo) error { config := &tpsConfig{ @@ -165,8 +181,11 @@ func (t *tpsExecutor) Configure(info loadgen.ScenarioInfo) error { return fmt.Errorf("%s must be positive, got %v", VisibilityVerificationTimeoutFlag, config.VisibilityVerificationTimeout) } + config.DisableLocalActivities = info.ScenarioOptionBool(DisableLocalActivitiesFlag, false) + t.config = config t.rng = rand.New(rand.NewSource(config.RngSeed)) + return nil } @@ -183,35 +202,33 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error } t.runID = info.RunID - // Track start time of current run - currentRunStartTime := time.Now() - - // Add search attribute, if it doesn't exist yet, to query for workflows by run ID. - // Running this on resume, too, in case a previous Omes run crashed before it could add the search attribute. - if err := loadgen.InitSearchAttribute(ctx, info, ThroughputStressScenarioIdSearchAttribute); err != nil { - return err - } - t.lock.Lock() isResuming := t.isResuming currentState := *t.state t.lock.Unlock() + // Initialize workflow completion checker + timeout := info.ScenarioOptionDuration(VisibilityVerificationTimeoutFlag, 30*time.Second) + completionVerifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, timeout) + if err != nil { + return fmt.Errorf("failed to initialize workflow completion checker: %w", err) + } + t.tpsVerifier = &tpsVerifier{ + completionVerifier: completionVerifier, + config: t.config, + } + if isResuming { info.Logger.Info(fmt.Sprintf("Resuming scenario from state: %#v", currentState)) - info.Configuration.StartFromIteration = int(currentState.CompletedIterations) + 1 + if execState, ok := currentState.ExecutorState.(loadgen.ExecutorState); ok { + info.Configuration.StartFromIteration = execState.CompletedIterations + } } else { if err := t.verifyFirstRun(ctx, info, t.config.SkipCleanNamespaceCheck); err != nil { return err } } - // Listen to iteration completion events to update the state. - info.Configuration.OnCompletion = func(ctx context.Context, run *loadgen.Run) { - t.updateStateOnIterationCompletion() - info.Logger.Debugf("Completed iteration %d", run.Iteration) - } - // Start the scenario run. // // NOTE: When resuming, it can happen that there are no more iterations/time left to run more iterations. @@ -219,24 +236,13 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error if isResuming && info.Configuration.Duration <= 0 && info.Configuration.Iterations == 0 { info.Logger.Info("Skipping executor run: out of time") } else { - ksExec := &loadgen.KitchenSinkExecutor{ + t.executor = &loadgen.KitchenSinkExecutor{ TestInput: &TestInput{ WorkflowInput: &WorkflowInput{ InitialActions: []*ActionSet{}, }, }, UpdateWorkflowOptions: func(ctx context.Context, run *loadgen.Run, options *loadgen.KitchenSinkWorkflowOptions) error { - options.StartOptions = run.DefaultStartWorkflowOptions() - if isResuming { - // Enforce to never fail on "workflow already started" when resuming. - options.StartOptions.WorkflowExecutionErrorWhenAlreadyStarted = false - } - - // Add search attribute to the workflow options so that it can be used in visibility queries. - options.StartOptions.TypedSearchAttributes = temporal.NewSearchAttributes( - temporal.NewSearchAttributeKeyString(ThroughputStressScenarioIdSearchAttribute).ValueSet(info.RunID), - ) - // Start some workflows via Update-with-Start. if t.maybeWithStart(0.5) { options.Params.WithStartAction = &WithStartClientAction{ @@ -262,19 +268,56 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error return nil }, } - if err := ksExec.Run(ctx, info); err != nil { + + // Restore state if resuming + if isResuming { + if execState, ok := t.state.ExecutorState.(loadgen.ExecutorState); ok { + t.executor.LoadState(func(v any) error { + s := v.(*loadgen.ExecutorState) + *s = execState + return nil + }) + } + } + + // Configure expected workflow count function based on scenario config + expectedWorkflowCount := func(state loadgen.ExecutorState) int { + completedIterations := state.CompletedIterations + + // Calculate continue-as-new workflows + // var continueAsNewWorkflows int + // if t.config.ContinueAsNewAfterIter > 0 { + // // Subtract 1 because the last iteration doesn't trigger a continue-as-new. + // continueAsNewPerIter := (t.config.InternalIterations - 1) / t.config.ContinueAsNewAfterIter + // continueAsNewWorkflows = continueAsNewPerIter * completedIterations + // } + + // Calculate child workflows + completedChildWorkflows := completedIterations * t.config.InternalIterations + + // Total: parent + children + continue-as-new + return completedIterations + completedChildWorkflows // TODO continueAsNewWorkflows + } + completionVerifier.SetExpectedWorkflowCount(expectedWorkflowCount) + + if err := t.executor.Run(ctx, info); err != nil { return err } } t.lock.Lock() - completedIterations := t.state.CompletedIterations - t.state.AccumulatedDuration += time.Since(currentRunStartTime) - totalDuration := t.state.AccumulatedDuration + var completedIterations int + if t.executor != nil { + completedIterations = t.executor.GetState().CompletedIterations + } else { + // Executor was skipped, use state from previous run + if execState, ok := t.state.ExecutorState.(loadgen.ExecutorState); ok { + completedIterations = execState.CompletedIterations + } + } t.lock.Unlock() - completedChildWorkflows := completedIterations * t.config.InternalIterations - + // Calculate completion metrics for logging. var continueAsNewPerIter int var continueAsNewWorkflows int if t.config.ContinueAsNewAfterIter > 0 { @@ -282,58 +325,18 @@ func (t *tpsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error continueAsNewPerIter = (t.config.InternalIterations - 1) / t.config.ContinueAsNewAfterIter continueAsNewWorkflows = continueAsNewPerIter * completedIterations } - + completedChildWorkflows := completedIterations * t.config.InternalIterations completedWorkflows := completedIterations + completedChildWorkflows + continueAsNewWorkflows - var sb strings.Builder - sb.WriteString("[Scenario completion summary] ") - sb.WriteString(fmt.Sprintf("Run ID: %s, ", info.RunID)) - sb.WriteString(fmt.Sprintf("Total iterations completed: %d, ", completedIterations)) - sb.WriteString(fmt.Sprintf("Total child workflows: %d (%d per iteration), ", completedChildWorkflows, t.config.InternalIterations)) - sb.WriteString(fmt.Sprintf("Total continue-as-new workflows: %d (%d per iteration), ", continueAsNewWorkflows, continueAsNewPerIter)) - sb.WriteString(fmt.Sprintf("Total workflows completed: %d", completedWorkflows)) - info.Logger.Info(sb.String()) - - // Post-scenario: verify that at least one iteration was completed. - if completedIterations == 0 { - return errors.New("No iterations completed. Either the scenario never ran, or it failed to resume correctly.") - } - - // Post-scenario: verify reported workflow completion count from Visibility. - if err := loadgen.MinVisibilityCountEventually( - ctx, - info, - &workflowservice.CountWorkflowExecutionsRequest{ - Namespace: info.Namespace, - Query: fmt.Sprintf("%s='%s'", - ThroughputStressScenarioIdSearchAttribute, info.RunID), - }, - completedWorkflows, - t.config.VisibilityVerificationTimeout, - ); err != nil { - return err - } - - // Post-scenario: check throughput threshold - if t.config.MinThroughputPerHour > 0 { - actualThroughputPerHour := float64(completedWorkflows) / totalDuration.Hours() - - if actualThroughputPerHour < t.config.MinThroughputPerHour { - // Calculate how many workflows we expected given the duration - expectedWorkflows := int(totalDuration.Hours() * t.config.MinThroughputPerHour) + // Log completion summary. + info.Logger.Info(fmt.Sprintf( + "[Scenario completion summary] Run ID: %s, Total iterations completed: %d, "+ + "Total child workflows: %d (%d per iteration), Total continue-as-new workflows: %d (%d per iteration), "+ + "Total workflows completed: %d", + info.RunID, completedIterations, completedChildWorkflows, t.config.InternalIterations, + continueAsNewWorkflows, continueAsNewPerIter, completedWorkflows)) - return fmt.Errorf("insufficient throughput: %.1f workflows/hour < %.1f required "+ - "(completed %d workflows, expected %d in %v)", - actualThroughputPerHour, - t.config.MinThroughputPerHour, - completedWorkflows, - expectedWorkflows, - totalDuration.Round(time.Second)) - } - } - - // Post-scenario: ensure there are no failed or terminated workflows for this run. - return loadgen.VerifyNoFailedWorkflows(ctx, info, ThroughputStressScenarioIdSearchAttribute, info.RunID) + return nil } func (t *tpsExecutor) verifyFirstRun(ctx context.Context, info loadgen.ScenarioInfo, skipCleanNamespaceCheck bool) error { @@ -343,7 +346,7 @@ func (t *tpsExecutor) verifyFirstRun(ctx context.Context, info loadgen.ScenarioI } // Complain if there are already existing workflows with the provided run id; unless resuming. - workflowCountQry := fmt.Sprintf("%s='%s'", ThroughputStressScenarioIdSearchAttribute, info.RunID) + workflowCountQry := fmt.Sprintf("%s='%s'", loadgen.OmesExecutionIDSearchAttribute, info.ExecutionID) visibilityCount, err := info.Client.CountWorkflow(ctx, &workflowservice.CountWorkflowExecutionsRequest{ Namespace: info.Namespace, Query: workflowCountQry, @@ -359,13 +362,6 @@ func (t *tpsExecutor) verifyFirstRun(ctx context.Context, info loadgen.ScenarioI return nil } -func (t *tpsExecutor) updateStateOnIterationCompletion() { - t.lock.Lock() - defer t.lock.Unlock() - t.state.CompletedIterations += 1 - t.state.LastCompletedIterationAt = time.Now() -} - func (t *tpsExecutor) createActions(run *loadgen.Run) []*ActionSet { return []*ActionSet{ { @@ -375,6 +371,15 @@ func (t *tpsExecutor) createActions(run *loadgen.Run) []*ActionSet { } } +// activityLocality returns the appropriate activity locality function based on the config. +// If DisableLocalActivities is true, all activities will be remote; otherwise, return the local activity function. +func (t *tpsExecutor) activityLocality() func(*ExecuteActivityAction) *Action { + if t.config.DisableLocalActivities { + return DefaultRemoteActivity + } + return DefaultLocalActivity +} + func (t *tpsExecutor) createActionsChunk( run *loadgen.Run, childCount int, @@ -395,9 +400,9 @@ func (t *tpsExecutor) createActionsChunk( // Create actions for the current chunk for i := 0; i < itersPerChunk; i++ { syncActions := []*Action{ - PayloadActivity(256, 256, DefaultLocalActivity), - PayloadActivity(0, 256, DefaultLocalActivity), - PayloadActivity(0, 256, DefaultLocalActivity), + PayloadActivity(256, 256, t.activityLocality()), + PayloadActivity(0, 256, t.activityLocality()), + PayloadActivity(0, 256, t.activityLocality()), // TODO: use local activity: server error log "failed to set query completion state to succeeded ClientActivity(ClientActions(t.createSelfQuery()), DefaultRemoteActivity), } @@ -407,11 +412,11 @@ func (t *tpsExecutor) createActionsChunk( t.createChildWorkflowAction(run, childCount), PayloadActivity(256, 256, DefaultRemoteActivity), PayloadActivity(256, 256, DefaultRemoteActivity), - PayloadActivity(0, 256, DefaultLocalActivity), - PayloadActivity(0, 256, DefaultLocalActivity), - GenericActivity("noop", DefaultLocalActivity), + PayloadActivity(0, 256, t.activityLocality()), + PayloadActivity(0, 256, t.activityLocality()), + GenericActivity("noop", t.activityLocality()), ClientActivity(ClientActions(t.createSelfQuery()), DefaultRemoteActivity), - ClientActivity(ClientActions(t.createSelfSignal()), DefaultLocalActivity), + ClientActivity(ClientActions(t.createSelfSignal()), t.activityLocality()), ClientActivity(ClientActions(t.createSelfUpdateWithTimer()), DefaultRemoteActivity), ClientActivity(ClientActions(t.createSelfUpdateWithPayload()), DefaultRemoteActivity), // TODO: use local activity: there is an 8s gap in the event history @@ -504,11 +509,12 @@ func (t *tpsExecutor) createChildWorkflowAction(run *loadgen.Run, childID int) * }, }), }, - WorkflowId: fmt.Sprintf("%s/child-%d", run.DefaultStartWorkflowOptions().ID, childID), + WorkflowId: fmt.Sprintf("%s/child-%d", run.DefaultStartWorkflowOptions().ID, childID), + WorkflowIdReusePolicy: enums.WorkflowIdReusePolicy(enums.WORKFLOW_ID_CONFLICT_POLICY_USE_EXISTING), SearchAttributes: map[string]*common.Payload{ - ThroughputStressScenarioIdSearchAttribute: &common.Payload{ + loadgen.OmesExecutionIDSearchAttribute: &common.Payload{ Metadata: map[string][]byte{"encoding": []byte("json/plain"), "type": []byte("Keyword")}, - Data: []byte(fmt.Sprintf("%q", t.config.ScenarioRunID)), // quoted to be valid JSON string + Data: []byte(fmt.Sprintf("%q", run.ExecutionID)), // quoted to be valid JSON string }, }, }, @@ -592,7 +598,7 @@ func (t *tpsExecutor) createSelfUpdateWithPayloadAsLocal() *ClientAction { DoActions: &DoActionsUpdate{ Variant: &DoActionsUpdate_DoActions{ DoActions: SingleActionSet( - PayloadActivity(0, 256, DefaultLocalActivity), + PayloadActivity(0, 256, t.activityLocality()), ), }, }, @@ -651,3 +657,53 @@ func (t *tpsExecutor) maybeWithStart(likelihood float64) bool { defer t.lock.Unlock() return t.rng.Float64() <= likelihood } + +type tpsVerifier struct { + completionVerifier *loadgen.WorkflowCompletionVerifier + config *tpsConfig +} + +func (v *tpsVerifier) VerifyRun(ctx context.Context, info loadgen.ScenarioInfo, state loadgen.ExecutorState) []error { + var errors []error + + // 1. Delegate to completion verifier + errors = append(errors, v.completionVerifier.VerifyRun(ctx, info, state)...) + + // 2. Check throughput, if configured. + if v.config.MinThroughputPerHour > 0 { + // Recalculate expected workflow count for throughput check + var continueAsNewWorkflows int + if v.config.ContinueAsNewAfterIter > 0 { + continueAsNewPerIter := (v.config.InternalIterations - 1) / v.config.ContinueAsNewAfterIter + continueAsNewWorkflows = continueAsNewPerIter * state.CompletedIterations + } + completedChildWorkflows := state.CompletedIterations * v.config.InternalIterations + completedWorkflows := state.CompletedIterations + completedChildWorkflows + continueAsNewWorkflows + + // Calculate duration from executor state + var totalDuration time.Duration + if !state.StartedAt.IsZero() && !state.LastCompletedAt.IsZero() { + totalDuration = state.LastCompletedAt.Sub(state.StartedAt) + } + + if totalDuration == 0 { + errors = append(errors, fmt.Errorf("throughput check: no duration recorded (startedAt=%v, lastCompletedAt=%v)", + state.StartedAt, state.LastCompletedAt)) + } else { + actualThroughput := float64(completedWorkflows) / totalDuration.Hours() + + if actualThroughput < v.config.MinThroughputPerHour { + expectedWorkflows := int(totalDuration.Hours() * v.config.MinThroughputPerHour) + errors = append(errors, fmt.Errorf("throughput check: %.1f workflows/hour < %.1f required "+ + "(completed %d workflows, expected %d in %v)", + actualThroughput, + v.config.MinThroughputPerHour, + completedWorkflows, + expectedWorkflows, + totalDuration.Round(time.Second))) + } + } + } + + return errors +} diff --git a/scenarios/throughput_stress_test.go b/scenarios/throughput_stress_test.go index 47613010..eb3b64cc 100644 --- a/scenarios/throughput_stress_test.go +++ b/scenarios/throughput_stress_test.go @@ -42,7 +42,8 @@ func TestThroughputStress(t *testing.T) { require.NoError(t, err, "Executor should complete successfully") state := executor.Snapshot().(tpsState) - require.Equal(t, state.CompletedIterations, 2) + execState := state.ExecutorState.(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations) }) t.Run("Run executor again, resuming from middle", func(t *testing.T) { @@ -50,13 +51,19 @@ func TestThroughputStress(t *testing.T) { err := executor.LoadState(func(v any) error { s := v.(*tpsState) - s.CompletedIterations = 0 // execution will start from iteration 1 + s.ExecutorState = loadgen.ExecutorState{ + CompletedIterations: 1, // execution will start from iteration 1 + } return nil }) require.NoError(t, err) _, err = env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) require.NoError(t, err, "Executor should complete successfully when resuming from middle") + + state := executor.Snapshot().(tpsState) + execState := state.ExecutorState.(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations) }) t.Run("Run executor again, resuming from end", func(t *testing.T) { @@ -64,12 +71,18 @@ func TestThroughputStress(t *testing.T) { err := executor.LoadState(func(v any) error { s := v.(*tpsState) - s.CompletedIterations = s.CompletedIterations + s.ExecutorState = loadgen.ExecutorState{ + CompletedIterations: 2, + } return nil }) require.NoError(t, err) _, err = env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) require.NoError(t, err, "Executor should complete successfully when resuming from end") + + state := executor.Snapshot().(tpsState) + execState := state.ExecutorState.(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations) }) } diff --git a/scenarios/versioning_pinned_workflows.go b/scenarios/versioning_pinned_workflows.go new file mode 100644 index 00000000..a96650b3 --- /dev/null +++ b/scenarios/versioning_pinned_workflows.go @@ -0,0 +1,668 @@ +package scenarios + +// versioning_pinned_workflows implements a scenario for testing worker versioning with pinned workflows. +// +// This scenario uses the Worker Deployment APIs for worker versioning (non-deprecated). +// See: https://docs.temporal.io/develop/go/versioning +// +// Implementation approach: +// - Manages Go SDK workers directly within the scenario (not via OMES worker infrastructure) +// - Uses DeploymentOptions to configure workers with deployment names and build IDs +// - Starts multiple workers concurrently with different build IDs to support version bumping +// - Old workers remain running to handle pinned workflows while new workers handle new traffic +// +// The scenario: +// 1. Starts N workflows pinned to an initial version (default: 1) +// 2. Signals all workflows on each iteration +// 3. Bumps the version every N iterations by starting new workers and setting them as current +// 4. Verifies that workflow build IDs always move forward, never backward + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/loadgen/kitchensink" + commonpb "go.temporal.io/api/common/v1" + "go.temporal.io/api/enums/v1" + historypb "go.temporal.io/api/history/v1" + "go.temporal.io/api/serviceerror" + "go.temporal.io/api/workflowservice/v1" + "go.temporal.io/sdk/activity" + "go.temporal.io/sdk/client" + "go.temporal.io/sdk/worker" + "go.temporal.io/sdk/workflow" +) + +// retryUntilCtx retries the given function until it reports done or the context is done. +// Backoff starts at 1s and is capped at 10s. +// Using loadgen.RetryUntilCtx (removed local helper) + +const ( + // NumWorkflowsFlag controls how many workflows to start on iteration 0 + NumWorkflowsFlag = "num-workflows" + // VersionBumpIntervalFlag controls how many iterations between version bumps + VersionBumpIntervalFlag = "version-bump-interval" + // InitialVersionFlag is the initial version number to pin workflows to (default: 1) + InitialVersionFlag = "initial-version" +) + +type versioningPinnedState struct { + WorkflowIDs []string `json:"workflowIds"` + CurrentVersion string `json:"currentVersion"` + VersionSequence []string `json:"versionSequence"` +} + +type versioningPinnedConfig struct { + NumWorkflows int + VersionBumpInterval int + InitialVersion string +} + +type versioningPinnedExecutor struct { + lock sync.Mutex + state *versioningPinnedState + config *versioningPinnedConfig + workers []worker.Worker // All active workers (one per version) + deploymentName string +} + +var _ loadgen.Configurable = (*versioningPinnedExecutor)(nil) + +// noopActivity is a simple activity for testing +func noopActivity(_ context.Context) error { + return nil +} + +// simpleKitchenSinkWorkflow is a simplified kitchensink workflow for this scenario +// It executes a single activity and then waits indefinitely (until cancelled/terminated) +func simpleKitchenSinkWorkflow(ctx workflow.Context, params *kitchensink.WorkflowInput) (*commonpb.Payload, error) { + // Execute a simple activity to generate history with build ID + ao := workflow.ActivityOptions{ + StartToCloseTimeout: 10 * time.Second, + } + activityCtx := workflow.WithActivityOptions(ctx, ao) + if err := workflow.ExecuteActivity(activityCtx, "noop").Get(activityCtx, nil); err != nil { + return nil, err + } + + // Wait for signals indefinitely + signalChan := workflow.GetSignalChannel(ctx, "do_signal") + selector := workflow.NewSelector(ctx) + + // Keep workflow alive by continuously waiting for signals + for { + selector.AddReceive(signalChan, func(c workflow.ReceiveChannel, more bool) { + // Receive signal with the correct type (kitchensink.DoSignal) + var signal kitchensink.DoSignal + c.Receive(ctx, &signal) + + // Execute another activity when signaled + ao := workflow.ActivityOptions{ + StartToCloseTimeout: 10 * time.Second, + } + activityCtx := workflow.WithActivityOptions(ctx, ao) + _ = workflow.ExecuteActivity(activityCtx, "noop").Get(activityCtx, nil) + }) + + selector.Select(ctx) + + // Create a new selector for the next iteration + selector = workflow.NewSelector(ctx) + } +} + +func init() { + loadgen.MustRegisterScenario(loadgen.Scenario{ + Description: fmt.Sprintf( + "Worker versioning scenario with pinned workflows. Starts n workflows pinned to version 1, "+ + "signals them each iteration, and bumps versions every n iterations. "+ + "Use --option with '%s' (default: 10), '%s' (default: 5), '%s' (default: 1)", + NumWorkflowsFlag, VersionBumpIntervalFlag, InitialVersionFlag), + ExecutorFn: func() loadgen.Executor { return newVersioningPinnedExecutor() }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*versioningPinnedExecutor) + return e.Verify(ctx, info) + }, + }) +} + +func newVersioningPinnedExecutor() *versioningPinnedExecutor { + return &versioningPinnedExecutor{ + state: &versioningPinnedState{ + WorkflowIDs: []string{}, + CurrentVersion: "", + VersionSequence: []string{}, + }, + } +} + +// Configure initializes the executor configuration from scenario options. +func (e *versioningPinnedExecutor) Configure(info loadgen.ScenarioInfo) error { + initialVersionNum := info.ScenarioOptionInt(InitialVersionFlag, 1) + + config := &versioningPinnedConfig{ + NumWorkflows: info.ScenarioOptionInt(NumWorkflowsFlag, 10), + VersionBumpInterval: info.ScenarioOptionInt(VersionBumpIntervalFlag, 5), + InitialVersion: fmt.Sprintf("%d", initialVersionNum), + } + + if config.NumWorkflows <= 0 { + return fmt.Errorf("%s must be positive, got %d", NumWorkflowsFlag, config.NumWorkflows) + } + + if config.VersionBumpInterval <= 0 { + return fmt.Errorf("%s must be positive, got %d", VersionBumpIntervalFlag, config.VersionBumpInterval) + } + + if initialVersionNum <= 0 { + return fmt.Errorf("%s must be positive, got %d", InitialVersionFlag, initialVersionNum) + } + + e.config = config + return nil +} + +// startWorker creates and starts a new worker with the specified build ID and deployment options. +func (e *versioningPinnedExecutor) startWorker(ctx context.Context, info loadgen.ScenarioInfo, buildID string) (worker.Worker, error) { + taskQueue := info.RunID + ".local" + + // Create worker with deployment options + w := worker.New(info.Client, taskQueue, worker.Options{ + BuildID: buildID, + UseBuildIDForVersioning: true, + DeploymentOptions: worker.DeploymentOptions{ + UseVersioning: true, + Version: worker.WorkerDeploymentVersion{ + DeploymentName: e.deploymentName, + BuildID: buildID, + }, + // Use Pinned behavior by default - workflows stay on the version they started with + DefaultVersioningBehavior: workflow.VersioningBehaviorPinned, + }, + }) + + // Register workflow and activities + w.RegisterWorkflowWithOptions(simpleKitchenSinkWorkflow, workflow.RegisterOptions{Name: "kitchenSink"}) + w.RegisterActivityWithOptions(noopActivity, activity.RegisterOptions{Name: "noop"}) + + // Start the worker with retry until context done + if err := loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + if err := w.Start(); err != nil { + return false, err + } + return true, nil + }); err != nil { + return nil, fmt.Errorf("failed to start worker with build ID %s: %w", buildID, err) + } + + info.Logger.Infof("Started worker with build ID %s on task queue %s", buildID, taskQueue) + return w, nil +} + +// stopAllWorkers stops all running workers. +func (e *versioningPinnedExecutor) stopAllWorkers() { + e.lock.Lock() + workers := e.workers + e.workers = nil + e.lock.Unlock() + + for _, w := range workers { + if w != nil { + w.Stop() + } + } +} + +// Run executes the versioning scenario. +func (e *versioningPinnedExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + if err := e.Configure(info); err != nil { + return fmt.Errorf("failed to configure scenario: %w", err) + } + + e.lock.Lock() + e.state.CurrentVersion = e.config.InitialVersion + e.state.VersionSequence = []string{e.config.InitialVersion} + e.deploymentName = fmt.Sprintf("omes-deployment-%s", info.RunID) + e.lock.Unlock() + + // Ensure all workers are stopped when we exit + defer e.stopAllWorkers() + + // Calculate total iterations + totalIterations := info.Configuration.Iterations + if totalIterations == 0 && info.Configuration.Duration > 0 { + // Estimate iterations based on duration (assuming ~1 iteration per second) + totalIterations = int(info.Configuration.Duration.Seconds()) + } + + for iteration := 0; iteration < totalIterations; iteration++ { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + if iteration == 0 { + // Iteration 0: Start worker with initial version and start workflows + w, err := e.startWorker(ctx, info, e.state.CurrentVersion) + if err != nil { + return fmt.Errorf("failed to start initial worker: %w", err) + } + e.lock.Lock() + e.workers = append(e.workers, w) + e.lock.Unlock() + + // Wait for worker to be ready + time.Sleep(1 * time.Second) + + // Start n kitchensink workflows + if err := e.startWorkflows(ctx, info, iteration); err != nil { + return fmt.Errorf("failed to start workflows on iteration 0: %w", err) + } + info.Logger.Infof("Started %d workflows pinned to version %s", e.config.NumWorkflows, e.state.CurrentVersion) + } else { + // Check if we need to bump the version + if iteration > 0 && iteration%e.config.VersionBumpInterval == 0 { + if err := e.bumpVersion(ctx, info); err != nil { + return fmt.Errorf("failed to bump version on iteration %d: %w", iteration, err) + } + } + + // Send signals to all workflows + if err := e.signalAllWorkflows(ctx, info, iteration); err != nil { + // Log signal failures but don't fail the scenario (as per requirements) + info.Logger.Warnf("Some signals failed on iteration %d: %v", iteration, err) + } + } + + // Add a small delay between iterations to avoid overwhelming the system + time.Sleep(100 * time.Millisecond) + } + + // After all iterations, terminate workflows to complete the scenario + info.Logger.Info("Terminating workflows after scenario completion") + e.lock.Lock() + workflowIDs := make([]string, len(e.state.WorkflowIDs)) + copy(workflowIDs, e.state.WorkflowIDs) + e.lock.Unlock() + + for _, workflowID := range workflowIDs { + err := info.Client.TerminateWorkflow(ctx, workflowID, "", "scenario completed") + if err != nil { + info.Logger.Warnf("Failed to terminate workflow %s: %v", workflowID, err) + } + } + + return nil +} + +// startWorkflows starts n kitchensink workflows pinned to the current version. +func (e *versioningPinnedExecutor) startWorkflows(ctx context.Context, info loadgen.ScenarioInfo, iteration int) error { + e.lock.Lock() + currentVersion := e.state.CurrentVersion + deploymentName := e.deploymentName + e.lock.Unlock() + + taskQueue := info.RunID + ".local" + + // Set the current version as the deployment's current version + // The worker has already registered the deployment, now we set it as current + if err := e.setupVersioning(ctx, info.Client, info.Namespace, deploymentName, currentVersion); err != nil { + return fmt.Errorf("failed to setup versioning: %w", err) + } + + var wg sync.WaitGroup + errChan := make(chan error, e.config.NumWorkflows) + + for i := 0; i < e.config.NumWorkflows; i++ { + wg.Add(1) + go func(workflowNum int) { + defer wg.Done() + + workflowID := fmt.Sprintf("%s-versioned-%d", info.RunID, workflowNum) + + // Create a long-running workflow that waits for signals + testInput := &kitchensink.TestInput{ + WorkflowInput: &kitchensink.WorkflowInput{ + InitialActions: []*kitchensink.ActionSet{ + { + Actions: []*kitchensink.Action{ + // Set a workflow state to track initialization + kitchensink.NewSetWorkflowStateAction(fmt.Sprintf("workflow-%d-started", workflowNum), "true"), + // Execute a noop activity to generate some history with build ID + kitchensink.GenericActivity("noop", kitchensink.DefaultRemoteActivity), + // Wait for completion signal (this state will be set by a final signal) + kitchensink.NewAwaitWorkflowStateAction(fmt.Sprintf("workflow-%d-complete", workflowNum), "true"), + }, + Concurrent: false, + }, + }, + }, + } + + options := client.StartWorkflowOptions{ + ID: workflowID, + TaskQueue: taskQueue, + WorkflowExecutionTimeout: 24 * time.Hour, + SearchAttributes: map[string]any{ + loadgen.OmesExecutionIDSearchAttribute: info.ExecutionID, + }, + } + + var startErr error + if err := loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + _, startErr = info.Client.ExecuteWorkflow( + ctx, + options, + "kitchenSink", + testInput.WorkflowInput, + ) + if startErr == nil { + return true, nil + } + // Treat AlreadyStarted as success for idempotency + if _, ok := startErr.(*serviceerror.WorkflowExecutionAlreadyStarted); ok { + return true, nil + } + return false, startErr + }); err != nil { + errChan <- fmt.Errorf("failed to start workflow %s: %w", workflowID, startErr) + return + } + + e.lock.Lock() + e.state.WorkflowIDs = append(e.state.WorkflowIDs, workflowID) + e.lock.Unlock() + }(i) + } + + wg.Wait() + close(errChan) + + // Collect any errors + var errs []error + for err := range errChan { + errs = append(errs, err) + } + + if len(errs) > 0 { + return fmt.Errorf("failed to start %d workflows: %v", len(errs), errs[0]) + } + + return nil +} + +// setupVersioning configures the worker versioning for the deployment using Worker Deployment APIs. +func (e *versioningPinnedExecutor) setupVersioning(ctx context.Context, c client.Client, namespace, deploymentName, buildID string) error { + if err := loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + _, err := c.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ + Namespace: namespace, + DeploymentName: deploymentName, + BuildId: buildID, + }) + return err == nil, err + }); err != nil { + return fmt.Errorf("failed to set version %s as current for deployment %s: %w", buildID, deploymentName, err) + } + return nil +} + +// bumpVersion increases the version, starts a new worker with the new build ID, and sets it as current. +func (e *versioningPinnedExecutor) bumpVersion(ctx context.Context, info loadgen.ScenarioInfo) error { + e.lock.Lock() + // Parse current version (e.g., "1" -> 1) and increment + var versionNum int + _, err := fmt.Sscanf(e.state.CurrentVersion, "%d", &versionNum) + if err != nil { + e.lock.Unlock() + return fmt.Errorf("failed to parse version %s: %w", e.state.CurrentVersion, err) + } + + versionNum++ + newVersion := fmt.Sprintf("%d", versionNum) + e.lock.Unlock() + + // Start a new worker with the new build ID + // This keeps the old worker running to handle pinned workflows + w, err := e.startWorker(ctx, info, newVersion) + if err != nil { + return fmt.Errorf("failed to start worker for version %s: %w", newVersion, err) + } + + e.lock.Lock() + e.workers = append(e.workers, w) + deploymentName := e.deploymentName + e.lock.Unlock() + + // Wait for worker to be ready + time.Sleep(1 * time.Second) + + // Retry indefinitely until ctx is done when setting the new current version + // Set the new version as the current deployment version (retry until ctx done) + if err := loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + _, err = info.Client.WorkflowService().SetWorkerDeploymentCurrentVersion(ctx, &workflowservice.SetWorkerDeploymentCurrentVersionRequest{ + Namespace: info.Namespace, + DeploymentName: deploymentName, + BuildId: newVersion, + }) + return err == nil, err + }); err != nil { + return fmt.Errorf("failed to set version %s as current: %w", newVersion, err) + } + + e.lock.Lock() + info.Logger.Infof("Bumped version from %s to %s", e.state.CurrentVersion, newVersion) + e.state.CurrentVersion = newVersion + e.state.VersionSequence = append(e.state.VersionSequence, newVersion) + e.lock.Unlock() + + return nil +} + +// signalAllWorkflows sends a signal to all tracked workflows. +func (e *versioningPinnedExecutor) signalAllWorkflows(ctx context.Context, info loadgen.ScenarioInfo, iteration int) error { + e.lock.Lock() + workflowIDs := make([]string, len(e.state.WorkflowIDs)) + copy(workflowIDs, e.state.WorkflowIDs) + e.lock.Unlock() + + var wg sync.WaitGroup + errChan := make(chan error, len(workflowIDs)) + + for _, workflowID := range workflowIDs { + wg.Add(1) + go func(wfID string) { + defer wg.Done() + + // Send a signal that executes a simple action + signalAction := &kitchensink.DoSignal{ + Variant: &kitchensink.DoSignal_DoSignalActions_{ + DoSignalActions: &kitchensink.DoSignal_DoSignalActions{ + Variant: &kitchensink.DoSignal_DoSignalActions_DoActions{ + DoActions: kitchensink.SingleActionSet( + // Execute a noop activity as part of signal processing + kitchensink.GenericActivity("noop", kitchensink.DefaultLocalActivity), + ), + }, + }, + }, + } + + err := info.Client.SignalWorkflow( + ctx, + wfID, + "", + "do_signal", + signalAction, + ) + if err != nil { + // As per requirements, we ignore signal failures + info.Logger.Warnf("Signal failed for workflow %s: %v", wfID, err) + } + }(workflowID) + } + + wg.Wait() + close(errChan) + + return nil +} + +// Verify checks that each workflow's build ID always moved forward and never backward. +func (e *versioningPinnedExecutor) Verify(ctx context.Context, info loadgen.ScenarioInfo) []error { + e.lock.Lock() + workflowIDs := make([]string, len(e.state.WorkflowIDs)) + copy(workflowIDs, e.state.WorkflowIDs) + e.lock.Unlock() + + var errors []error + var errorsMutex sync.Mutex + + // Check each workflow's history + var wg sync.WaitGroup + for _, workflowID := range workflowIDs { + wg.Add(1) + go func(wfID string) { + defer wg.Done() + + violations := e.checkWorkflowHistory(ctx, info, wfID) + if len(violations) > 0 { + errorsMutex.Lock() + errors = append(errors, violations...) + errorsMutex.Unlock() + } + }(workflowID) + } + + wg.Wait() + + if len(errors) == 0 { + info.Logger.Infof("Verification passed: All %d workflows maintained forward-only version progression", len(workflowIDs)) + } else { + info.Logger.Errorf("Verification failed: Found %d version progression violations", len(errors)) + } + + return errors +} + +// checkWorkflowHistory checks a workflow's versioning info for build ID violations. +func (e *versioningPinnedExecutor) checkWorkflowHistory(ctx context.Context, info loadgen.ScenarioInfo, workflowID string) []error { + var errors []error + + // Get workflow execution description to access versioning info (with retry) + var describeResp *workflowservice.DescribeWorkflowExecutionResponse + var derr error + _ = loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + describeResp, derr = info.Client.WorkflowService().DescribeWorkflowExecution(ctx, &workflowservice.DescribeWorkflowExecutionRequest{ + Namespace: info.Namespace, + Execution: &commonpb.WorkflowExecution{ + WorkflowId: workflowID, + }, + }) + return derr == nil, derr + }) + if derr != nil { + errors = append(errors, fmt.Errorf("workflow %s: failed to describe execution: %w", workflowID, derr)) + return errors + } + + versioningInfo := describeResp.WorkflowExecutionInfo.GetVersioningInfo() + if versioningInfo == nil { + errors = append(errors, fmt.Errorf("workflow %s: no versioning info found", workflowID)) + return errors + } + + // Get workflow history to track build ID sequence + historyIter := info.Client.GetWorkflowHistory( + ctx, + workflowID, + "", + false, + enums.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT, + ) + + var buildIDSequence []string + buildIDVersionMap := make(map[string]int) // Map build IDs to version numbers + + // Parse version numbers from version sequence + e.lock.Lock() + for _, version := range e.state.VersionSequence { + var versionNum int + fmt.Sscanf(version, "%d", &versionNum) + buildIDVersionMap[version] = versionNum + } + e.lock.Unlock() + + // Iterate through history events to track build ID progression + // Use Started events (not deprecated) instead of Completed events + for historyIter.HasNext() { + var event *historypb.HistoryEvent + if err := loadgen.RetryUntilCtx(ctx, func(ctx context.Context) (bool, error) { + var err error + event, err = historyIter.Next() + if err != nil { + return false, err + } + return true, nil + }); err != nil { + errors = append(errors, fmt.Errorf("workflow %s: failed to read history: %w", workflowID, err)) + return errors + } + + // Check for build ID in Started events (GetWorkerVersion on Started events is not deprecated) + var buildID string + switch event.EventType { + case enums.EVENT_TYPE_WORKFLOW_TASK_STARTED: + if event.GetWorkflowTaskStartedEventAttributes() != nil && + event.GetWorkflowTaskStartedEventAttributes().GetWorkerVersion() != nil { + buildID = event.GetWorkflowTaskStartedEventAttributes().GetWorkerVersion().GetBuildId() + } + case enums.EVENT_TYPE_ACTIVITY_TASK_STARTED: + if event.GetActivityTaskStartedEventAttributes() != nil && + event.GetActivityTaskStartedEventAttributes().GetWorkerVersion() != nil { + buildID = event.GetActivityTaskStartedEventAttributes().GetWorkerVersion().GetBuildId() + } + } + + // If we found a build ID, track it (avoid duplicates) + if buildID != "" { + if len(buildIDSequence) == 0 || buildIDSequence[len(buildIDSequence)-1] != buildID { + buildIDSequence = append(buildIDSequence, buildID) + } + } + } + + // Check that build IDs never moved backward + for i := 1; i < len(buildIDSequence); i++ { + prevBuildID := buildIDSequence[i-1] + currBuildID := buildIDSequence[i] + + prevVersion, prevExists := buildIDVersionMap[prevBuildID] + currVersion, currExists := buildIDVersionMap[currBuildID] + + if !prevExists { + errors = append(errors, fmt.Errorf( + "workflow %s: unknown build ID '%s' at position %d in sequence", + workflowID, prevBuildID, i-1)) + continue + } + + if !currExists { + errors = append(errors, fmt.Errorf( + "workflow %s: unknown build ID '%s' at position %d in sequence", + workflowID, currBuildID, i)) + continue + } + + if currVersion < prevVersion { + errors = append(errors, fmt.Errorf( + "workflow %s: build ID moved backward from %s (%d) to %s (%d) at history position %d", + workflowID, prevBuildID, prevVersion, currBuildID, currVersion, i)) + } + } + + return errors +} diff --git a/scenarios/versioning_pinned_workflows_test.go b/scenarios/versioning_pinned_workflows_test.go new file mode 100644 index 00000000..02f5f711 --- /dev/null +++ b/scenarios/versioning_pinned_workflows_test.go @@ -0,0 +1,114 @@ +package scenarios + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/temporalio/omes/cmd/clioptions" + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/workers" + "go.temporal.io/api/enums/v1" + "go.uber.org/zap/zaptest" +) + +func TestVersioningPinnedWorkflows(t *testing.T) { + t.Parallel() + + runID := fmt.Sprintf("vpw-%d", time.Now().Unix()) + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(2*time.Minute)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: runID, + Configuration: loadgen.RunConfiguration{ + Iterations: 12, // 0 (start) + 11 iterations, will bump versions at 5 and 10 + }, + ScenarioOptions: map[string]string{ + NumWorkflowsFlag: "3", // Start 3 workflows + VersionBumpIntervalFlag: "5", // Bump every 5 iterations + InitialVersionFlag: "1", // Start with version 1 + }, + } + + t.Run("Run executor", func(t *testing.T) { + executor := newVersioningPinnedExecutor() + + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "Executor should complete successfully") + + executor.lock.Lock() + state := *executor.state + executor.lock.Unlock() + + require.Len(t, state.WorkflowIDs, 3, "Should have started 3 workflows") + require.Equal(t, "3", state.CurrentVersion, "Should have bumped to 3 (1->2 at iter 5, 2->3 at iter 10)") + require.Equal(t, []string{"1", "2", "3"}, state.VersionSequence, "Should track all versions") + }) + + t.Run("Verify checks build ID progression", func(t *testing.T) { + executor := newVersioningPinnedExecutor() + + // Run a simple scenario + shortScenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("vpw-verify-%d", time.Now().Unix()), + ExecutionID: "test-verify-exec-id", + Configuration: loadgen.RunConfiguration{ + Iterations: 7, // 0 (start) + 6 iterations, will bump at iteration 5 + }, + ScenarioOptions: map[string]string{ + NumWorkflowsFlag: "2", + VersionBumpIntervalFlag: "5", + InitialVersionFlag: "1", // Start with version 1 + }, + } + + _, err := env.RunExecutorTest(t, executor, shortScenarioInfo, clioptions.LangGo) + require.NoError(t, err, "Executor should complete successfully") + + // Now verify the workflows + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + + // Create a new scenario info for verification with proper client setup + verifyInfo := loadgen.ScenarioInfo{ + RunID: shortScenarioInfo.RunID, + ExecutionID: shortScenarioInfo.ExecutionID, + Client: env.TemporalClient(), + Logger: zaptest.NewLogger(t).Sugar(), + Namespace: "default", + } + + errors := executor.Verify(ctx, verifyInfo) + require.Empty(t, errors, "Verification should pass with no errors") + + executor.lock.Lock() + state := *executor.state + executor.lock.Unlock() + + require.Len(t, state.WorkflowIDs, 2) + require.Equal(t, "2", state.CurrentVersion, "Should have bumped to 2") + + // Verify we can read the workflow histories + for _, workflowID := range state.WorkflowIDs { + iter := env.TemporalClient().GetWorkflowHistory( + ctx, + workflowID, + "", + false, + enums.HISTORY_EVENT_FILTER_TYPE_ALL_EVENT, + ) + + eventCount := 0 + for iter.HasNext() { + _, err := iter.Next() + require.NoError(t, err) + eventCount++ + } + require.Greater(t, eventCount, 0, "Should have history events for workflow %s", workflowID) + } + }) +} diff --git a/scenarios/workflow_completion_checker_test.go b/scenarios/workflow_completion_checker_test.go new file mode 100644 index 00000000..82c37ef3 --- /dev/null +++ b/scenarios/workflow_completion_checker_test.go @@ -0,0 +1,47 @@ +package scenarios + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/temporalio/omes/cmd/clioptions" + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/workers" +) + +// Test that WorkflowCompletionChecker is able to detect a stuck workflow. +// Uses the stuck_workflow scenario which has a workflow that blocks forever on iteration 1. +// The scenario's executor implements the Verifier interface, so env.RunExecutorTest +// automatically runs verification and reports errors. +func TestWorkflowCompletionChecker(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(5*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("stuck-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 10, + }, + } + + // Get the stuck_workflow scenario executor + scenario := loadgen.GetScenario("stuck_workflow") + require.NotNil(t, scenario, "stuck_workflow scenario should be registered") + executor := scenario.ExecutorFn() + + // RunExecutorTest will automatically run verification since the executor implements Verifier + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "should fail due to stuck workflow and verification errors") + require.Contains(t, err.Error(), "deadline exceeded", "should report timed out iteration") + require.Contains(t, err.Error(), "non-completed workflow: Namespace=default, WorkflowID=w-stuck-", "should report stuck workflow from verifier") + + // Verify the executor state shows 9 completed iterations (all except the stuck one) + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 9, execState.CompletedIterations, "should complete 9 iterations (all except iteration 1 which is stuck)") +} diff --git a/scenarios/workflow_loop.go b/scenarios/workflow_loop.go new file mode 100644 index 00000000..0d2b3627 --- /dev/null +++ b/scenarios/workflow_loop.go @@ -0,0 +1,167 @@ +package scenarios + +import ( + "context" + "fmt" + "math/rand" + "time" + + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/loadgen/kitchensink" +) + +const ( + // LoopsFlag controls the number of activities to execute sequentially + LoopsFlag = "loops" + // MessageViaFlag controls whether to use signal, update, or random (default: "signal") + MessageViaFlag = "message-via" +) + +type workflowLoopExecutor struct { + *loadgen.KitchenSinkExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *workflowLoopExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the kitchen sink executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + +func init() { + loadgen.MustRegisterScenario(loadgen.Scenario{ + Description: fmt.Sprintf("Creates n activities sequentially, each sends one signal or update back to the workflow. "+ + "The workflow waits for each signal/update before proceeding. "+ + "Use --option %s= to set the count (default: 1). "+ + "Use --option %s= to choose mechanism (default: signal).", + LoopsFlag, MessageViaFlag), + ExecutorFn: func() loadgen.Executor { + return &workflowLoopExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ + TestInput: &kitchensink.TestInput{ + WorkflowInput: &kitchensink.WorkflowInput{}, + }, + PrepareTestInput: func(ctx context.Context, info loadgen.ScenarioInfo, params *kitchensink.TestInput) error { + activityCount := info.ScenarioOptionInt(LoopsFlag, 1) + if activityCount <= 0 { + return fmt.Errorf("%s must be positive, got %d", LoopsFlag, activityCount) + } + + messageVia := info.ScenarioOptions[MessageViaFlag] + if messageVia == "" { + messageVia = "signal" + } + if messageVia != "signal" && messageVia != "update" && messageVia != "random" { + return fmt.Errorf("%s must be 'signal', 'update', or 'random', got '%s'", MessageViaFlag, messageVia) + } + + info.Logger.Infof("Preparing workflow loop with %d iterations using message-via=%s", activityCount, messageVia) + + // Create actions for the workflow + var actions []*kitchensink.Action + + // Use a single state variable "loop-index" that tracks the current index + // This ensures signals/updates are processed consecutively in order + const stateKey = "loop-index" + + // For each iteration, create a sequential action that: + // 1. Executes an activity that sends a signal or update back to the workflow + // 2. Waits for the workflow state to be set to the current index by that signal/update + for i := 0; i < activityCount; i++ { + stateValue := fmt.Sprintf("%d", i) + + // Determine if we use update for this iteration + var useUpdate bool + if messageVia == "random" { + // Pick randomly between signal and update + useUpdate = rand.Intn(2) == 1 + } else { + useUpdate = messageVia == "update" + } + + // Create the client action that will be executed + var clientAction *kitchensink.ClientAction + if useUpdate { + // Use update + clientAction = &kitchensink.ClientAction{ + Variant: &kitchensink.ClientAction_DoUpdate{ + DoUpdate: &kitchensink.DoUpdate{ + Variant: &kitchensink.DoUpdate_DoActions{ + DoActions: &kitchensink.DoActionsUpdate{ + Variant: &kitchensink.DoActionsUpdate_DoActions{ + DoActions: &kitchensink.ActionSet{ + Actions: []*kitchensink.Action{ + kitchensink.NewSetWorkflowStateAction(stateKey, stateValue), + }, + }, + }, + }, + }, + }, + }, + } + } else { + // Use signal + clientAction = &kitchensink.ClientAction{ + Variant: &kitchensink.ClientAction_DoSignal{ + DoSignal: &kitchensink.DoSignal{ + Variant: &kitchensink.DoSignal_DoSignalActions_{ + DoSignalActions: &kitchensink.DoSignal_DoSignalActions{ + Variant: &kitchensink.DoSignal_DoSignalActions_DoActions{ + DoActions: &kitchensink.ActionSet{ + Actions: []*kitchensink.Action{ + kitchensink.NewSetWorkflowStateAction(stateKey, stateValue), + }, + }, + }, + }, + }, + }, + }, + } + } + + // Execute an activity that performs the client action (sends signal/update) + // This activity will use the Temporal client to send the signal/update back to the workflow + actions = append(actions, kitchensink.ClientActivity( + kitchensink.ClientActions(clientAction), + kitchensink.DefaultRemoteActivity, + )) + + // Wait for the workflow state to be set to the current index by the signal/update + // This ensures signals/updates are processed consecutively in order (0, 1, 2, ...) + actions = append(actions, kitchensink.NewAwaitWorkflowStateAction(stateKey, stateValue)) + } + + // Add final return action + actions = append(actions, kitchensink.NewEmptyReturnResultAction()) + + // Set the actions as sequential (not concurrent) + params.WorkflowInput.InitialActions = []*kitchensink.ActionSet{ + { + Actions: actions, + Concurrent: false, + }, + } + + return nil + }, + }, + } + }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*workflowLoopExecutor) + if e.completionVerifier == nil { + return nil + } + state := e.KitchenSinkExecutor.GetState() + return e.completionVerifier.VerifyRun(ctx, info, state) + }, + }) +} diff --git a/scenarios/workflow_loop_test.go b/scenarios/workflow_loop_test.go new file mode 100644 index 00000000..a6b1a201 --- /dev/null +++ b/scenarios/workflow_loop_test.go @@ -0,0 +1,249 @@ +package scenarios + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/require" + "github.com/temporalio/omes/cmd/clioptions" + "github.com/temporalio/omes/loadgen" + "github.com/temporalio/omes/workers" +) + +// TestWorkflowLoopScenario tests the workflow_loop scenario with default settings (1 activity). +func TestWorkflowLoopScenario(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(60*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 3, + }, + ScenarioOptions: map[string]string{ + LoopsFlag: "1", + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + require.NotNil(t, scenario.VerifyFn, "workflow_loop scenario should have a VerifyFn") + + executor := scenario.ExecutorFn() + + // Run the executor + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "executor should complete successfully") + + // Verify the executor state + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 3, execState.CompletedIterations, "should complete 3 iterations") +} + +// TestWorkflowLoopScenarioMultipleActivities tests the workflow_loop scenario with multiple activities. +func TestWorkflowLoopScenarioMultipleActivities(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(30*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-multi-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 2, + }, + ScenarioOptions: map[string]string{ + LoopsFlag: "5", + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "executor should complete successfully with 5 activities") + + // Verify the executor state + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations, "should complete 2 iterations") +} + +// TestWorkflowLoopScenarioVerification tests that the VerifyFn properly validates workflow completion. +func TestWorkflowLoopScenarioVerification(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(30*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-verify-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 5, + }, + ScenarioOptions: map[string]string{ + LoopsFlag: "3", + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor and verification + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "should complete successfully and pass verification") + + // Verify the executor state shows correct number of completed iterations + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 5, execState.CompletedIterations, "should complete all 5 iterations") +} + +// TestWorkflowLoopScenarioInvalidConfig tests that invalid configuration is rejected. +func TestWorkflowLoopScenarioInvalidConfig(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(10*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-invalid-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 1, + }, + ScenarioOptions: map[string]string{ + LoopsFlag: "0", // Invalid: must be positive + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor - should fail due to invalid configuration + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.Error(t, err, "should fail with invalid activity count") + require.Contains(t, err.Error(), "must be positive", "error should mention positive requirement") +} + +// TestWorkflowLoopScenarioWithUpdates tests the workflow_loop scenario using updates instead of signals. +func TestWorkflowLoopScenarioWithUpdates(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(30*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-update-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 2, + }, + ScenarioOptions: map[string]string{ + LoopsFlag: "3", + MessageViaFlag: "update", // Use updates instead of signals + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "executor should complete successfully with updates") + + // Verify the executor state + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations, "should complete 2 iterations using updates") +} + +// TestWorkflowLoopScenarioWithUpdatesMultipleIterations tests updates with more iterations. +func TestWorkflowLoopScenarioWithUpdatesMultipleIterations(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(30*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-update-multi-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 4, + }, + ScenarioOptions: map[string]string{ + LoopsFlag: "2", + MessageViaFlag: "update", + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "executor should complete successfully with multiple iterations using updates") + + // Verify the executor state + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 4, execState.CompletedIterations, "should complete 4 iterations") +} + +// TestWorkflowLoopScenarioWithRandomSignalAndUpdate tests random selection between signals and updates. +func TestWorkflowLoopScenarioWithRandomSignalAndUpdate(t *testing.T) { + t.Parallel() + + env := workers.SetupTestEnvironment(t, + workers.WithExecutorTimeout(30*time.Second)) + + scenarioInfo := loadgen.ScenarioInfo{ + RunID: fmt.Sprintf("loop-random-via-%d", time.Now().Unix()), + Configuration: loadgen.RunConfiguration{ + Iterations: 2, + }, + ScenarioOptions: map[string]string{ + LoopsFlag: "5", + MessageViaFlag: "random", // Randomly pick between signal and update + }, + } + + // Get the workflow_loop scenario + scenario := loadgen.GetScenario("workflow_loop") + require.NotNil(t, scenario, "workflow_loop scenario should be registered") + + executor := scenario.ExecutorFn() + + // Run the executor + _, err := env.RunExecutorTest(t, executor, scenarioInfo, clioptions.LangGo) + require.NoError(t, err, "executor should complete successfully with random signal/update") + + // Verify the executor state + resumable, ok := executor.(loadgen.Resumable) + require.True(t, ok, "executor should implement Resumable interface") + execState := resumable.Snapshot().(loadgen.ExecutorState) + require.Equal(t, 2, execState.CompletedIterations, "should complete 2 iterations with random via") +} diff --git a/scenarios/workflow_on_many_task_queues.go b/scenarios/workflow_on_many_task_queues.go index e838832c..dbd786f3 100644 --- a/scenarios/workflow_on_many_task_queues.go +++ b/scenarios/workflow_on_many_task_queues.go @@ -3,39 +3,67 @@ package scenarios import ( "context" "fmt" + "time" "github.com/temporalio/omes/loadgen" "github.com/temporalio/omes/loadgen/kitchensink" ) +type manyTaskQueuesExecutor struct { + *loadgen.KitchenSinkExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *manyTaskQueuesExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the kitchen sink executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Each iteration executes a single workflow on one of the task queues. " + "Workers must be started with --task-queue-suffix-index-end as one less than task queue count here. " + "Additional options: task-queue-count (required).", ExecutorFn: func() loadgen.Executor { - return loadgen.KitchenSinkExecutor{ - TestInput: &kitchensink.TestInput{ - WorkflowInput: &kitchensink.WorkflowInput{ - InitialActions: []*kitchensink.ActionSet{ - kitchensink.NoOpSingleActivityActionSet(), + return &manyTaskQueuesExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ + TestInput: &kitchensink.TestInput{ + WorkflowInput: &kitchensink.WorkflowInput{ + InitialActions: []*kitchensink.ActionSet{ + kitchensink.NoOpSingleActivityActionSet(), + }, }, }, + PrepareTestInput: func(ctx context.Context, opts loadgen.ScenarioInfo, params *kitchensink.TestInput) error { + // Require task queue count + if opts.ScenarioOptionInt("task-queue-count", 0) == 0 { + return fmt.Errorf("task-queue-count option required") + } + return nil + }, + UpdateWorkflowOptions: func(ctx context.Context, run *loadgen.Run, options *loadgen.KitchenSinkWorkflowOptions) error { + // Add suffix to the task queue based on modulus of iteration + options.StartOptions.TaskQueue += + fmt.Sprintf("-%v", run.Iteration%run.ScenarioInfo.ScenarioOptionInt("task-queue-count", 0)) + return nil + }, }, - PrepareTestInput: func(ctx context.Context, opts loadgen.ScenarioInfo, params *kitchensink.TestInput) error { - // Require task queue count - if opts.ScenarioOptionInt("task-queue-count", 0) == 0 { - return fmt.Errorf("task-queue-count option required") - } - return nil - }, - UpdateWorkflowOptions: func(ctx context.Context, run *loadgen.Run, options *loadgen.KitchenSinkWorkflowOptions) error { - // Add suffix to the task queue based on modulus of iteration - options.StartOptions.TaskQueue += - fmt.Sprintf("-%v", run.Iteration%run.ScenarioInfo.ScenarioOptionInt("task-queue-count", 0)) - return nil - }, } }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*manyTaskQueuesExecutor) + if e.completionVerifier == nil { + return nil + } + state := e.KitchenSinkExecutor.GetState() + return e.completionVerifier.VerifyRun(ctx, info, state) + }, }) } diff --git a/scenarios/workflow_with_many_actions.go b/scenarios/workflow_with_many_actions.go index 2325c18b..d142f281 100644 --- a/scenarios/workflow_with_many_actions.go +++ b/scenarios/workflow_with_many_actions.go @@ -2,21 +2,41 @@ package scenarios import ( "context" + "strconv" + "time" + "go.temporal.io/api/common/v1" "go.temporal.io/sdk/converter" "google.golang.org/protobuf/types/known/durationpb" - "strconv" "github.com/temporalio/omes/loadgen" "github.com/temporalio/omes/loadgen/kitchensink" ) +type manyActionsExecutor struct { + *loadgen.KitchenSinkExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *manyActionsExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the kitchen sink executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Each iteration executes a single workflow with a number of child workflows and/or activities. " + "Additional options: children-per-workflow (default 30), activities-per-workflow (default 30).", ExecutorFn: func() loadgen.Executor { - return loadgen.KitchenSinkExecutor{ + return &manyActionsExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ TestInput: &kitchensink.TestInput{ WorkflowInput: &kitchensink.WorkflowInput{ InitialActions: []*kitchensink.ActionSet{}, @@ -85,7 +105,16 @@ func init() { ) return nil }, + }, + } + }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + e := executor.(*manyActionsExecutor) + if e.completionVerifier == nil { + return nil } + state := e.KitchenSinkExecutor.GetState() + return e.completionVerifier.VerifyRun(ctx, info, state) }, }) } diff --git a/scenarios/workflow_with_single_noop_activity.go b/scenarios/workflow_with_single_noop_activity.go index f859aad1..9194bbd6 100644 --- a/scenarios/workflow_with_single_noop_activity.go +++ b/scenarios/workflow_with_single_noop_activity.go @@ -1,23 +1,54 @@ package scenarios import ( + "context" + "time" + "github.com/temporalio/omes/loadgen" "github.com/temporalio/omes/loadgen/kitchensink" ) +type noopActivityExecutor struct { + *loadgen.KitchenSinkExecutor + completionVerifier *loadgen.WorkflowCompletionVerifier +} + +func (e *noopActivityExecutor) Run(ctx context.Context, info loadgen.ScenarioInfo) error { + // Create completion verifier + verifier, err := loadgen.NewWorkflowCompletionChecker(ctx, info, 30*time.Second) + if err != nil { + return err + } + e.completionVerifier = verifier + + // Run the kitchen sink executor + return e.KitchenSinkExecutor.Run(ctx, info) +} + func init() { loadgen.MustRegisterScenario(loadgen.Scenario{ Description: "Each iteration executes a single workflow with a noop activity.", ExecutorFn: func() loadgen.Executor { - return loadgen.KitchenSinkExecutor{ - TestInput: &kitchensink.TestInput{ - WorkflowInput: &kitchensink.WorkflowInput{ - InitialActions: []*kitchensink.ActionSet{ - kitchensink.NoOpSingleActivityActionSet(), + return &noopActivityExecutor{ + KitchenSinkExecutor: &loadgen.KitchenSinkExecutor{ + TestInput: &kitchensink.TestInput{ + WorkflowInput: &kitchensink.WorkflowInput{ + InitialActions: []*kitchensink.ActionSet{ + kitchensink.NoOpSingleActivityActionSet(), + }, }, }, }, } }, + VerifyFn: func(ctx context.Context, info loadgen.ScenarioInfo, executor loadgen.Executor) []error { + // e := executor.(*noopActivityExecutor) + // if e.completionVerifier == nil { + // return nil + // } + // state := e.KitchenSinkExecutor.GetState() + // return e.completionVerifier.VerifyRun(ctx, info, state) + return nil + }, }) } diff --git a/versions.env b/versions.env index 04ff047c..91a863e9 100644 --- a/versions.env +++ b/versions.env @@ -14,4 +14,4 @@ DOTNET_SDK_VERSION=1.9.0 GO_SDK_VERSION=1.37.0 JAVA_SDK_VERSION=1.31.0 PYTHON_SDK_VERSION=1.19.0 -TYPESCRIPT_SDK_VERSION=1.12.1 +TYPESCRIPT_SDK_VERSION=1.13.2 diff --git a/workers/dotnet/Temporalio.Omes/KitchenSinkWorkflow.cs b/workers/dotnet/Temporalio.Omes/KitchenSinkWorkflow.cs index 5cf1286c..1b7c682d 100644 --- a/workers/dotnet/Temporalio.Omes/KitchenSinkWorkflow.cs +++ b/workers/dotnet/Temporalio.Omes/KitchenSinkWorkflow.cs @@ -412,17 +412,11 @@ private static Temporalio.Common.RetryPolicy RetryPolicyFromProto(RetryPolicy pr private static Temporalio.Common.Priority PriorityFromProto(ExecuteActivityAction eaa) { - if (eaa.FairnessKey != null) + return new Temporalio.Common.Priority { - throw new ApplicationFailureException("FairnessKey is not supported yet"); - } - if (eaa.FairnessWeight > 0) - { - throw new ApplicationFailureException("FairnessWeight is not supported yet"); - } - return new() - { - PriorityKey = eaa.Priority.PriorityKey + PriorityKey = eaa.Priority.PriorityKey, + FairnessKey = !string.IsNullOrEmpty(eaa.FairnessKey) ? eaa.FairnessKey : null, + FairnessWeight = eaa.FairnessWeight > 0 ? eaa.FairnessWeight : null, }; } diff --git a/workers/go/ebbandflow/activities.go b/workers/go/ebbandflow/activities.go deleted file mode 100644 index 5295bce5..00000000 --- a/workers/go/ebbandflow/activities.go +++ /dev/null @@ -1,31 +0,0 @@ -package ebbandflow - -import ( - "context" - "time" - - "github.com/temporalio/omes/loadgen/kitchensink" - "go.temporal.io/sdk/activity" -) - -type Activities struct{} - -type ActivityExecutionResult struct { - ScheduledTime time.Time `json:"scheduledTime"` - ActualStartTime time.Time `json:"actualStartTime"` -} - -func (a Activities) MeasureLatencyActivity( - ctx context.Context, - activityAction *kitchensink.ExecuteActivityAction, -) (ActivityExecutionResult, error) { - if delay := activityAction.GetDelay(); delay != nil { - time.Sleep(delay.AsDuration()) - } - - activityInfo := activity.GetInfo(ctx) - return ActivityExecutionResult{ - ScheduledTime: activityInfo.ScheduledTime, - ActualStartTime: activityInfo.StartedTime, - }, nil -} diff --git a/workers/go/ebbandflow/workflow.go b/workers/go/ebbandflow/workflow.go deleted file mode 100644 index fc67fe91..00000000 --- a/workers/go/ebbandflow/workflow.go +++ /dev/null @@ -1,87 +0,0 @@ -package ebbandflow - -import ( - "fmt" - "math/rand" - "sync" - "time" - - "github.com/temporalio/omes/loadgen/ebbandflow" - "github.com/temporalio/omes/workers/go/workflowutils" - "go.temporal.io/sdk/temporal" - "go.temporal.io/sdk/workflow" -) - -var activityStub = Activities{} - -// EbbAndFlowTrackWorkflow executes activities and returns their schedule-to-start times with fairness data -func EbbAndFlowTrackWorkflow(ctx workflow.Context, params *ebbandflow.WorkflowParams) (*ebbandflow.WorkflowOutput, error) { - rng := rand.New(rand.NewSource(workflow.Now(ctx).UnixNano())) - activities := params.SleepActivities.Sample(rng) - - if len(activities) == 0 { - return &ebbandflow.WorkflowOutput{Timings: []ebbandflow.ActivityTiming{}}, nil - } - - var results []ebbandflow.ActivityTiming - var resultsMutex sync.Mutex - - var activityFuncs []func(workflow.Context) error - for _, activity := range activities { - activityFuncs = append(activityFuncs, func(ctx workflow.Context) error { - // Set up activity options - opts := workflow.ActivityOptions{ - StartToCloseTimeout: 1 * time.Minute, - RetryPolicy: &temporal.RetryPolicy{}, - } - - // Set priority, if specified - if activity.Priority != nil { - opts.Priority.PriorityKey = int(activity.Priority.PriorityKey) - } - - // Set fairness, if specified - fairnessKey := activity.GetFairnessKey() - fairnessWeight := activity.GetFairnessWeight() - if fairnessKey != "" { - opts.Priority.FairnessKey = fairnessKey - opts.Priority.FairnessWeight = fairnessWeight - } - - // Execute activity - var activityResult ActivityExecutionResult - actCtx := workflow.WithActivityOptions(ctx, opts) - err := workflow.ExecuteActivity(actCtx, activityStub.MeasureLatencyActivity, activity).Get(ctx, &activityResult) - if err != nil { - workflow.GetLogger(ctx).Error("Activity execution failed", "error", err) - return err - } - - // Calculate schedule-to-start time using accurate activity timing - scheduleToStartMS := activityResult.ActualStartTime.Sub(activityResult.ScheduledTime) - - result := ebbandflow.ActivityTiming{ - ScheduleToStart: scheduleToStartMS, - } - - // Thread-safe append to results - resultsMutex.Lock() - results = append(results, result) - resultsMutex.Unlock() - - return nil - }) - } - - err := workflowutils.RunConcurrently(ctx, activityFuncs...) - if err != nil { - workflow.GetLogger(ctx).Error("Failed to execute activities concurrently", "error", err) - } - - // Check if all activities failed - if len(results) == 0 { - return nil, fmt.Errorf("failed to start any of the %d activities", len(activities)) - } - - return &ebbandflow.WorkflowOutput{Timings: results}, nil -} diff --git a/workers/go/go.sum b/workers/go/go.sum index e8fbc88a..ff9d801f 100644 --- a/workers/go/go.sum +++ b/workers/go/go.sum @@ -54,8 +54,6 @@ github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/temporalio/features v0.0.0-20251113235102-ac7c92445a59 h1:+k/VNVoVeoe1rvX+9qoedknC5UkdA3BbL8TFKWtaZMU= -github.com/temporalio/features v0.0.0-20251113235102-ac7c92445a59/go.mod h1:Ew0bBvTHCGcs2fX+iyoUqoj78x5eS7BznJTPg8wd35I= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= diff --git a/workers/go/kitchensink/kitchen_sink.go b/workers/go/kitchensink/kitchen_sink.go index f78244f4..186a28d8 100644 --- a/workers/go/kitchensink/kitchen_sink.go +++ b/workers/go/kitchensink/kitchen_sink.go @@ -386,10 +386,10 @@ func launchActivity(ctx workflow.Context, act *kitchensink.ExecuteActivityAction priority.PriorityKey = int(prio.PriorityKey) } if fk := act.GetFairnessKey(); fk != "" { - return fmt.Errorf("fairness key is not supported yet") + priority.FairnessKey = fk } if fw := act.GetFairnessWeight(); fw > 0 { - return fmt.Errorf("fairness weight is not supported yet") + priority.FairnessWeight = fw } opts := workflow.ActivityOptions{ diff --git a/workers/go/worker/worker.go b/workers/go/worker/worker.go index 81d2595a..4b53a558 100644 --- a/workers/go/worker/worker.go +++ b/workers/go/worker/worker.go @@ -6,7 +6,6 @@ import ( "github.com/nexus-rpc/sdk-go/nexus" "github.com/spf13/cobra" "github.com/temporalio/omes/cmd/clioptions" - "github.com/temporalio/omes/workers/go/ebbandflow" "github.com/temporalio/omes/workers/go/kitchensink" "go.temporal.io/sdk/activity" "go.temporal.io/sdk/client" @@ -68,7 +67,6 @@ func makePollerBehavior(simple, auto int) worker.PollerBehavior { func runWorkers(client client.Client, taskQueues []string, options clioptions.WorkerOptions) error { errCh := make(chan error, len(taskQueues)) - ebbFlowActivities := ebbandflow.Activities{} clientActivities := kitchensink.ClientActivities{ Client: client, } @@ -103,8 +101,6 @@ func runWorkers(client client.Client, taskQueues []string, options clioptions.Wo w.RegisterActivityWithOptions(clientActivities.ExecuteClientActivity, activity.RegisterOptions{Name: "client"}) w.RegisterWorkflow(kitchensink.EchoWorkflow) w.RegisterWorkflow(kitchensink.WaitForCancelWorkflow) - w.RegisterWorkflowWithOptions(ebbandflow.EbbAndFlowTrackWorkflow, workflow.RegisterOptions{Name: "ebbAndFlowTrack"}) - w.RegisterActivity(&ebbFlowActivities) w.RegisterNexusService(service) errCh <- w.Run(worker.InterruptCh()) }() diff --git a/workers/java/.classpath b/workers/java/.classpath new file mode 100644 index 00000000..9f95f67b --- /dev/null +++ b/workers/java/.classpath @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/workers/java/.settings/org.eclipse.buildship.core.prefs b/workers/java/.settings/org.eclipse.buildship.core.prefs new file mode 100644 index 00000000..b78a8101 --- /dev/null +++ b/workers/java/.settings/org.eclipse.buildship.core.prefs @@ -0,0 +1,13 @@ +arguments=--init-script /var/folders/4w/5qdjw8sd6417nldg5pvhs_rr0000gn/T/db3b08fc4a9ef609cb16b96b200fa13e563f396e9bb1ed0905fdab7bc3bc513b.gradle +auto.sync=false +build.scans.enabled=false +connection.gradle.distribution=GRADLE_DISTRIBUTION(WRAPPER) +connection.project.dir= +eclipse.preferences.version=1 +gradle.user.home= +java.home=/Users/stephan/.local/share/mise/installs/java/21.0.2 +jvm.arguments= +offline.mode=false +override.workspace.settings=true +show.console.view=true +show.executions.view=true diff --git a/workers/java/.settings/org.eclipse.jdt.core.prefs b/workers/java/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 00000000..ee4d5dd0 --- /dev/null +++ b/workers/java/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,4 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=10 +org.eclipse.jdt.core.compiler.compliance=10 +org.eclipse.jdt.core.compiler.source=10 diff --git a/workers/java/io/temporal/omes/KitchenSinkWorkflowImpl.java b/workers/java/io/temporal/omes/KitchenSinkWorkflowImpl.java index af30c1e9..808f0cc6 100644 --- a/workers/java/io/temporal/omes/KitchenSinkWorkflowImpl.java +++ b/workers/java/io/temporal/omes/KitchenSinkWorkflowImpl.java @@ -357,10 +357,10 @@ private void launchActivity(KitchenSink.ExecuteActivityAction executeActivity) { prio.setPriorityKey(priority.getPriorityKey()); } if (executeActivity.getFairnessKey() != "") { - throw new IllegalArgumentException("FairnessKey is not supported"); + prio.setFairnessKey(executeActivity.getFairnessKey()); } if (executeActivity.getFairnessWeight() > 0) { - throw new IllegalArgumentException("FairnessWeight is not supported"); + prio.setFairnessWeight(executeActivity.getFairnessWeight()); } if (executeActivity.hasIsLocal()) { diff --git a/workers/python/kitchen_sink.py b/workers/python/kitchen_sink.py index 56398a52..15a66478 100644 --- a/workers/python/kitchen_sink.py +++ b/workers/python/kitchen_sink.py @@ -233,10 +233,25 @@ def launch_activity(execute_activity: ExecuteActivityAction) -> ActivityHandle: # TODO: cancel type can be in local ) else: - if execute_activity.HasField("priority"): - raise NotImplementedError("priority is not supported yet") + priority = None + if ( + execute_activity.HasField("priority") + or execute_activity.fairness_key + or execute_activity.fairness_weight > 0 + ): + priority = Priority( # type: ignore[call-arg] + priority_key=execute_activity.priority.priority_key + if execute_activity.HasField("priority") + else 0, + fairness_key=execute_activity.fairness_key # type: ignore[call-arg] + if execute_activity.fairness_key + else None, + fairness_weight=execute_activity.fairness_weight # type: ignore[call-arg] + if execute_activity.fairness_weight > 0 + else None, + ) - activity_task = workflow.start_activity( + activity_task = workflow.start_activity( # type: ignore[misc] activity=act_type, args=args, task_queue=execute_activity.task_queue, @@ -256,6 +271,7 @@ def launch_activity(execute_activity: ExecuteActivityAction) -> ActivityHandle: cancellation_type=convert_act_cancel_type( execute_activity.remote.cancellation_type ), + priority=priority, # type: ignore[arg-type] ) return activity_task diff --git a/workers/run.go b/workers/run.go index 13ba36d0..a7526b97 100644 --- a/workers/run.go +++ b/workers/run.go @@ -190,10 +190,17 @@ func passthrough(fs *pflag.FlagSet, prefix string) (flags []string) { if !f.Changed { return } - flags = append(flags, fmt.Sprintf("--%s=%s", - strings.TrimPrefix(f.Name, prefix), - f.Value.String(), - )) + + flagName := strings.TrimPrefix(f.Name, prefix) + + if f.Value.Type() == "bool" { + // Some SDKs like Python don't like `--tls=true` + if f.Value.String() == "true" { + flags = append(flags, fmt.Sprintf("--%s", flagName)) + } + } else { + flags = append(flags, fmt.Sprintf("--%s=%s", flagName, f.Value.String())) + } }) return } diff --git a/workers/test_env.go b/workers/test_env.go index 9663b2b3..3ba268b2 100644 --- a/workers/test_env.go +++ b/workers/test_env.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "path/filepath" + "reflect" "testing" "time" @@ -96,6 +97,14 @@ func SetupTestEnvironment(t *testing.T, opts ...TestEnvOption) *TestEnvironment LogLevel: "error", Stdout: &logWriter{logger: serverLogger}, Stderr: &logWriter{logger: serverLogger}, + ExtraArgs: []string{ + "--search-attribute", "OmesExecutionID=Keyword", + "--search-attribute", "KS_Int=Int", + "--search-attribute", "KS_Keyword=Keyword", + "--dynamic-config-value", "frontend.workerVersioningDataAPIs=true", + "--dynamic-config-value", "frontend.workerVersioningWorkflowAPIs=true", + "--dynamic-config-value", "frontend.workerVersioningRuleAPIs=true", + }, }) require.NoError(t, err, "Failed to start dev server") @@ -163,7 +172,7 @@ func (env *TestEnvironment) createNexusEndpoint(ctx context.Context, taskQueueNa return endpointName, nil } -// RunExecutorTest runs an executor with a specific SDK and server address +// RunExecutorTest runs an executor with a specific SDK and server address. func (env *TestEnvironment) RunExecutorTest( t *testing.T, executor loadgen.Executor, @@ -194,6 +203,48 @@ func (env *TestEnvironment) RunExecutorTest( execErr := executor.Run(testCtx, scenarioInfo) + // Run verification if executor implements Verifier interface. + // Use a fresh context for verification, not the executor context which may be canceled. + var verifyErrs []error + if verifier, ok := executor.(loadgen.Verifier); ok { + if stateful, hasSnapshot := executor.(interface{ Snapshot() any }); hasSnapshot { + snapshot := stateful.Snapshot() + // Try to extract ExecutorState from the snapshot + var execState loadgen.ExecutorState + var hasState bool + + switch s := snapshot.(type) { + case loadgen.ExecutorState: + execState = s + hasState = true + default: + // For custom state types with an ExecutorState field + v := reflect.ValueOf(snapshot) + if v.Kind() == reflect.Struct { + if field := v.FieldByName("ExecutorState"); field.IsValid() { + if es, ok := field.Interface().(loadgen.ExecutorState); ok { + execState = es + hasState = true + } else if field.Kind() == reflect.Interface && !field.IsNil() { + // Handle case where ExecutorState is stored as interface{} + if es, ok := field.Elem().Interface().(loadgen.ExecutorState); ok { + execState = es + hasState = true + } + } + } + } + } + + if hasState { + // Create a fresh context for verification with appropriate timeout + verifyCtx, cancelVerify := context.WithTimeout(t.Context(), env.executorTimeout) + defer cancelVerify() + verifyErrs = verifier.VerifyRun(verifyCtx, scenarioInfo, execState) + } + } + } + // Trigger worker shutdown. cancelTestCtx() @@ -208,8 +259,9 @@ func (env *TestEnvironment) RunExecutorTest( workerErr = fmt.Errorf("timed out waiting for worker shutdown") } - return TestResult{ObservedLogs: observedLogs}, - errors.Join(execErr, workerErr) + // Combine all errors + allErrs := append([]error{execErr, workerErr}, verifyErrs...) + return TestResult{ObservedLogs: observedLogs}, errors.Join(allErrs...) } func (env *TestEnvironment) buildDirName() string { diff --git a/workers/test_workers.go b/workers/test_workers.go index 9f4bdcfc..e72251a1 100644 --- a/workers/test_workers.go +++ b/workers/test_workers.go @@ -114,6 +114,10 @@ func (w *workerPool) startWorker( PreparedLogger: logger.Named(fmt.Sprintf("%s-worker", sdk)), }, } + // Configure build ID for versioning if specified in scenario options + if buildID, ok := scenarioInfo.ScenarioOptions["worker-build-id"]; ok && buildID != "" { + runner.WorkerOptions.FlagSet("worker-").Set("worker-build-id", buildID) + } runner.ClientOptions.FlagSet().Set("server-address", w.env.DevServerAddress()) runner.ClientOptions.FlagSet().Set("namespace", testNamespace) workerDone <- runner.Run(ctx, baseDir) diff --git a/workers/typescript/package-lock.json b/workers/typescript/package-lock.json index b158d5fc..6f801d9e 100644 --- a/workers/typescript/package-lock.json +++ b/workers/typescript/package-lock.json @@ -8,10 +8,10 @@ "name": "omes", "version": "0.1.0", "dependencies": { - "@temporalio/activity": "^1.12.1", - "@temporalio/client": "^1.12.1", - "@temporalio/worker": "^1.12.1", - "@temporalio/workflow": "^1.12.1", + "@temporalio/activity": "^1.13.2", + "@temporalio/client": "^1.13.2", + "@temporalio/worker": "^1.13.2", + "@temporalio/workflow": "^1.13.2", "commander": "^11.1.0", "long": "^5.2.3", "winston": "^3.11.0" @@ -652,12 +652,13 @@ "integrity": "sha512-myfUej5naTBWnqOCc/MdVOLVjXUXtIA+NpDrDBKJtLLg2shUjBu3cZmB/85RyitKc55+lUUyl7oRfLOvkr2hsw==" }, "node_modules/@temporalio/activity": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/activity/-/activity-1.12.1.tgz", - "integrity": "sha512-EPPIR5J0A6OxWTr5HGyeM2Lwh3US8S73N3ZFelCPaJwOq2Fh7qrLiwYp2wCwGYhhYI9Xppo3xE45MWUxayBa3Q==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/activity/-/activity-1.13.2.tgz", + "integrity": "sha512-Mp0pAGNKGeIlZEy6ToLCt1gJdrumu64xHF1yAc1gsOVeqo4a3ISGFbCSpM56bokwtj9jpFK/Z1f3zCFnif2ogg==", "license": "MIT", "dependencies": { - "@temporalio/common": "1.12.1", + "@temporalio/client": "1.13.2", + "@temporalio/common": "1.13.2", "abort-controller": "^3.0.0" }, "engines": { @@ -665,31 +666,32 @@ } }, "node_modules/@temporalio/client": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/client/-/client-1.12.1.tgz", - "integrity": "sha512-m89isGb6I4BBeCbhkvXbpjeRZZUa3E2R06J/I+t2JWgv0Tg+PoNPusvU9UBd6LN7f7AetsQvAZKU6eQHyWxSEA==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/client/-/client-1.13.2.tgz", + "integrity": "sha512-gyptINv/i6DTG4sRgE6S10vsO6V56iQQujDFaVIwg5pcRsRqqHIwoOldI4j1RqrEoEy7J4prRBGNwOd5H3Yf8A==", "license": "MIT", "dependencies": { "@grpc/grpc-js": "^1.12.4", - "@temporalio/common": "1.12.1", - "@temporalio/proto": "1.12.1", + "@temporalio/common": "1.13.2", + "@temporalio/proto": "1.13.2", "abort-controller": "^3.0.0", "long": "^5.2.3", - "uuid": "^9.0.1" + "uuid": "^11.1.0" }, "engines": { "node": ">= 18.0.0" } }, "node_modules/@temporalio/common": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/common/-/common-1.12.1.tgz", - "integrity": "sha512-gMVNYh49qGNFPKN22BPXtQlgvcD8rxUoP0QO0ePeaz9TyHG6+3TURGhc8xybJA7zHnpfW8TH8XHMWJIMzCPxtg==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/common/-/common-1.13.2.tgz", + "integrity": "sha512-qpp/1Bn+Uvbnew3jHL5u1YWRfBmNnklzfZwa5oOnQ5EBxKMWmpGzCtvh+VwaGXunbPHh1Teqy76Mqp/Uj2kmbA==", "license": "MIT", "dependencies": { - "@temporalio/proto": "1.12.1", + "@temporalio/proto": "1.13.2", "long": "^5.2.3", - "ms": "^3.0.0-canary.1", + "ms": "3.0.0-canary.1", + "nexus-rpc": "^0.0.1", "proto3-json-serializer": "^2.0.0" }, "engines": { @@ -697,14 +699,14 @@ } }, "node_modules/@temporalio/core-bridge": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/core-bridge/-/core-bridge-1.12.1.tgz", - "integrity": "sha512-JOLavcVhzLf4QDK7S/SAZjTbbtiYRoZoJCvJsl6T9s6MJFyeT1ih+4jeAN3UUmhLvaP++sqEuFSfRVJ0ZFoFNA==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/core-bridge/-/core-bridge-1.13.2.tgz", + "integrity": "sha512-zwYZqeWypi1YHTeoYwBYgIVmWNg4+/T+CCcOwtyNUvA25wim85p9JOCB9tKgG4e8Hu1Nptd7yEjPaZtLPmJjjg==", "hasInstallScript": true, "license": "MIT", "dependencies": { "@grpc/grpc-js": "^1.12.4", - "@temporalio/common": "1.12.1", + "@temporalio/common": "1.13.2", "arg": "^5.0.2", "cargo-cp-artifact": "^0.1.8", "which": "^4.0.0" @@ -713,10 +715,26 @@ "node": ">= 18.0.0" } }, + "node_modules/@temporalio/nexus": { + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/nexus/-/nexus-1.13.2.tgz", + "integrity": "sha512-oG+yZcgUiDCNU08aI7q5dKvRyeUtzJH7Woz66dx4QlhEIvRoUeEFqjLHySMf2r/3l1pbhZ5G2z12HcL4pVE5Eg==", + "license": "MIT", + "dependencies": { + "@temporalio/client": "1.13.2", + "@temporalio/common": "1.13.2", + "@temporalio/proto": "1.13.2", + "long": "^5.2.3", + "nexus-rpc": "^0.0.1" + }, + "engines": { + "node": ">= 18.0.0" + } + }, "node_modules/@temporalio/proto": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/proto/-/proto-1.12.1.tgz", - "integrity": "sha512-hW5jvxBuoKdh3CwbGT/AQoPMFoGG8xcPcHRMCTta/HZGFHRDibbr0aDfPS6ke7oYtcpWF0A8d6jRAHEXyPUvUQ==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/proto/-/proto-1.13.2.tgz", + "integrity": "sha512-V8agtFxM2KkKOtUjcCZFaIdOV64j86VrUQ4bvOZtzwmWGyp5ZCebskoaTTL8UMkRx4bTIeEKOckLrXo8VeorWg==", "license": "MIT", "dependencies": { "long": "^5.2.3", @@ -727,22 +745,25 @@ } }, "node_modules/@temporalio/worker": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/worker/-/worker-1.12.1.tgz", - "integrity": "sha512-jI3UxPAVbuM2MJO0c27iNV59KNHgAlx6yoJOpcE+jdGAmoN52MHdSt3qedrWtWINgZDbZg9dPC8KoDbXr9kP6g==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/worker/-/worker-1.13.2.tgz", + "integrity": "sha512-UEyHDjY/xJsTIg6DEwla6wncenOrmOGu13HnjwwqY2iUNJdoQUSHlqMK7Cc7hK0zpeAb7qLOCi2A1bSYVncAHg==", "license": "MIT", "dependencies": { "@grpc/grpc-js": "^1.12.4", "@swc/core": "^1.3.102", - "@temporalio/activity": "1.12.1", - "@temporalio/client": "1.12.1", - "@temporalio/common": "1.12.1", - "@temporalio/core-bridge": "1.12.1", - "@temporalio/proto": "1.12.1", - "@temporalio/workflow": "1.12.1", + "@temporalio/activity": "1.13.2", + "@temporalio/client": "1.13.2", + "@temporalio/common": "1.13.2", + "@temporalio/core-bridge": "1.13.2", + "@temporalio/nexus": "1.13.2", + "@temporalio/proto": "1.13.2", + "@temporalio/workflow": "1.13.2", "abort-controller": "^3.0.0", - "heap-js": "^2.3.0", + "heap-js": "^2.6.0", "memfs": "^4.6.0", + "nexus-rpc": "^0.0.1", + "proto3-json-serializer": "^2.0.0", "protobufjs": "^7.2.5", "rxjs": "^7.8.1", "source-map": "^0.7.4", @@ -772,13 +793,14 @@ } }, "node_modules/@temporalio/workflow": { - "version": "1.12.1", - "resolved": "https://registry.npmjs.org/@temporalio/workflow/-/workflow-1.12.1.tgz", - "integrity": "sha512-r2d2tzEf6zJENewZMku1ge53QO52ZTN8bJXp8zzerPYyMx9Iqhg3Ck1ckrdpxpDw9gxBYZsRbwS2vpiq53ZKRQ==", + "version": "1.13.2", + "resolved": "https://registry.npmjs.org/@temporalio/workflow/-/workflow-1.13.2.tgz", + "integrity": "sha512-vK8s0iCTMGNLtUZeKiFVfmLd4nVUDaJ4aS0yCy8WvMUpgqBTpaaOWPAy7KiH0grKB7zIskiWljEMtpt3ce586w==", "license": "MIT", "dependencies": { - "@temporalio/common": "1.12.1", - "@temporalio/proto": "1.12.1" + "@temporalio/common": "1.13.2", + "@temporalio/proto": "1.13.2", + "nexus-rpc": "^0.0.1" }, "engines": { "node": ">= 18.0.0" @@ -2421,9 +2443,10 @@ } }, "node_modules/heap-js": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/heap-js/-/heap-js-2.3.0.tgz", - "integrity": "sha512-E5303mzwQ+4j/n2J0rDvEPBN7GKjhis10oHiYOgjxsmxYgqG++hz9NyLLOXttzH8as/DyiBHYpUrJTZWYaMo8Q==", + "version": "2.7.1", + "resolved": "https://registry.npmjs.org/heap-js/-/heap-js-2.7.1.tgz", + "integrity": "sha512-EQfezRg0NCZGNlhlDR3Evrw1FVL2G3LhU7EgPoxufQKruNBSYA8MiRPHeWbU+36o+Fhel0wMwM+sLEiBAlNLJA==", + "license": "BSD-3-Clause", "engines": { "node": ">=10.0.0" } @@ -3028,6 +3051,15 @@ "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==" }, + "node_modules/nexus-rpc": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/nexus-rpc/-/nexus-rpc-0.0.1.tgz", + "integrity": "sha512-hAWn8Hh2eewpB5McXR5EW81R3pR/ziuGhKCF3wFyUVCklanPqrIgMNr7jKCbzXeNVad0nUDfWpFRqh2u+zxQtw==", + "license": "MIT", + "engines": { + "node": ">= 18.0.0" + } + }, "node_modules/node-releases": { "version": "2.0.19", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.19.tgz", @@ -4237,15 +4269,16 @@ "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==" }, "node_modules/uuid": { - "version": "9.0.1", - "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", - "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-11.1.0.tgz", + "integrity": "sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==", "funding": [ "https://github.com/sponsors/broofa", "https://github.com/sponsors/ctavan" ], + "license": "MIT", "bin": { - "uuid": "dist/bin/uuid" + "uuid": "dist/esm/bin/uuid" } }, "node_modules/v8-compile-cache": { diff --git a/workers/typescript/package.json b/workers/typescript/package.json index ea99d42c..4042baca 100644 --- a/workers/typescript/package.json +++ b/workers/typescript/package.json @@ -21,10 +21,10 @@ ] }, "dependencies": { - "@temporalio/activity": "^1.12.1", - "@temporalio/client": "^1.12.1", - "@temporalio/worker": "^1.12.1", - "@temporalio/workflow": "^1.12.1", + "@temporalio/activity": "^1.13.2", + "@temporalio/client": "^1.13.2", + "@temporalio/worker": "^1.13.2", + "@temporalio/workflow": "^1.13.2", "commander": "^11.1.0", "long": "^5.2.3", "winston": "^3.11.0" diff --git a/workers/typescript/src/workflows/kitchen_sink.ts b/workers/typescript/src/workflows/kitchen_sink.ts index e55c6411..26533705 100644 --- a/workers/typescript/src/workflows/kitchen_sink.ts +++ b/workers/typescript/src/workflows/kitchen_sink.ts @@ -290,21 +290,41 @@ function launchActivity(execActivity: IExecuteActivityAction): Promise actType = 'client'; args.push(execActivity.client); } - const actArgs: ActivityOptions | LocalActivityOptions = { - scheduleToCloseTimeout: durationConvertMaybeUndefined(execActivity.scheduleToCloseTimeout), - startToCloseTimeout: durationConvertMaybeUndefined(execActivity.startToCloseTimeout), - scheduleToStartTimeout: durationConvertMaybeUndefined(execActivity.scheduleToStartTimeout), - retry: decompileRetryPolicy(execActivity.retryPolicy), - priority: decodePriority(execActivity.priority), - }; - if (execActivity.isLocal) { - return scheduleLocalActivity(actType, args, actArgs); + const localArgs: LocalActivityOptions = { + scheduleToCloseTimeout: durationConvertMaybeUndefined(execActivity.scheduleToCloseTimeout), + startToCloseTimeout: durationConvertMaybeUndefined(execActivity.startToCloseTimeout), + scheduleToStartTimeout: durationConvertMaybeUndefined(execActivity.scheduleToStartTimeout), + retry: decompileRetryPolicy(execActivity.retryPolicy), + }; + return scheduleLocalActivity(actType, args, localArgs); } else { - const remoteArgs = actArgs as ActivityOptions; - remoteArgs.taskQueue = execActivity.taskQueue ?? undefined; - remoteArgs.cancellationType = convertCancelType(execActivity.remote?.cancellationType); - remoteArgs.heartbeatTimeout = durationConvert(execActivity.heartbeatTimeout); + // Build priority object with fairness key and weight + let priority = decodePriority(execActivity.priority); + if ( + execActivity.fairnessKey || + (execActivity.fairnessWeight && execActivity.fairnessWeight > 0) + ) { + priority = { + ...priority, + fairnessKey: execActivity.fairnessKey || undefined, + fairnessWeight: + execActivity.fairnessWeight && execActivity.fairnessWeight > 0 + ? execActivity.fairnessWeight + : undefined, + }; + } + + const remoteArgs: ActivityOptions = { + scheduleToCloseTimeout: durationConvertMaybeUndefined(execActivity.scheduleToCloseTimeout), + startToCloseTimeout: durationConvertMaybeUndefined(execActivity.startToCloseTimeout), + scheduleToStartTimeout: durationConvertMaybeUndefined(execActivity.scheduleToStartTimeout), + retry: decompileRetryPolicy(execActivity.retryPolicy), + priority, + taskQueue: execActivity.taskQueue ?? undefined, + cancellationType: convertCancelType(execActivity.remote?.cancellationType), + heartbeatTimeout: durationConvert(execActivity.heartbeatTimeout), + }; return scheduleActivity(actType, args, remoteArgs); } }