From 9219cf4757c37ca53e51e346de3c7949040c8d7c Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Tue, 12 May 2026 12:19:35 +0200 Subject: [PATCH 01/33] init runner-org-sync service --- src/runner-org-sync/.dockerignore | 15 + src/runner-org-sync/.gitignore | 6 + src/runner-org-sync/.golangci.yml | 40 ++ src/runner-org-sync/Dockerfile | 36 ++ src/runner-org-sync/Makefile | 33 ++ src/runner-org-sync/README.md | 222 ++++++++ .../cmd/runner-org-sync/main.go | 230 +++++++++ src/runner-org-sync/go.mod | 75 +++ src/runner-org-sync/go.sum | 183 +++++++ .../infra/kustomize/cronjob.yaml | 91 ++++ .../infra/kustomize/kustomization.yaml | 24 + src/runner-org-sync/infra/kustomize/role.yaml | 17 + .../infra/kustomize/rolebinding.yaml | 11 + .../infra/kustomize/serviceaccount.yaml | 9 + src/runner-org-sync/internal/cdn/cdn.go | 119 +++++ src/runner-org-sync/internal/cdn/cdn_test.go | 194 +++++++ src/runner-org-sync/internal/config/config.go | 147 ++++++ .../internal/config/config_test.go | 185 +++++++ src/runner-org-sync/internal/gitea/gitea.go | 126 +++++ .../internal/gitea/gitea_test.go | 154 ++++++ .../internal/k8sstate/k8sstate.go | 164 ++++++ .../internal/k8sstate/k8sstate_test.go | 210 ++++++++ .../internal/keyvault/keyvault.go | 113 +++++ .../internal/keyvault/keyvault_test.go | 99 ++++ .../internal/reconcile/reconcile.go | 296 +++++++++++ .../internal/reconcile/reconcile_test.go | 476 ++++++++++++++++++ .../internal/telemetry/metrics.go | 108 ++++ .../internal/telemetry/telemetry.go | 131 +++++ 28 files changed, 3514 insertions(+) create mode 100644 src/runner-org-sync/.dockerignore create mode 100644 src/runner-org-sync/.gitignore create mode 100644 src/runner-org-sync/.golangci.yml create mode 100644 src/runner-org-sync/Dockerfile create mode 100644 src/runner-org-sync/Makefile create mode 100644 src/runner-org-sync/README.md create mode 100644 src/runner-org-sync/cmd/runner-org-sync/main.go create mode 100644 src/runner-org-sync/go.mod create mode 100644 src/runner-org-sync/go.sum create mode 100644 src/runner-org-sync/infra/kustomize/cronjob.yaml create mode 100644 src/runner-org-sync/infra/kustomize/kustomization.yaml create mode 100644 src/runner-org-sync/infra/kustomize/role.yaml create mode 100644 src/runner-org-sync/infra/kustomize/rolebinding.yaml create mode 100644 src/runner-org-sync/infra/kustomize/serviceaccount.yaml create mode 100644 src/runner-org-sync/internal/cdn/cdn.go create mode 100644 src/runner-org-sync/internal/cdn/cdn_test.go create mode 100644 src/runner-org-sync/internal/config/config.go create mode 100644 src/runner-org-sync/internal/config/config_test.go create mode 100644 src/runner-org-sync/internal/gitea/gitea.go create mode 100644 src/runner-org-sync/internal/gitea/gitea_test.go create mode 100644 src/runner-org-sync/internal/k8sstate/k8sstate.go create mode 100644 src/runner-org-sync/internal/k8sstate/k8sstate_test.go create mode 100644 src/runner-org-sync/internal/keyvault/keyvault.go create mode 100644 src/runner-org-sync/internal/keyvault/keyvault_test.go create mode 100644 src/runner-org-sync/internal/reconcile/reconcile.go create mode 100644 src/runner-org-sync/internal/reconcile/reconcile_test.go create mode 100644 src/runner-org-sync/internal/telemetry/metrics.go create mode 100644 src/runner-org-sync/internal/telemetry/telemetry.go diff --git a/src/runner-org-sync/.dockerignore b/src/runner-org-sync/.dockerignore new file mode 100644 index 00000000000..dcfabf0e3b1 --- /dev/null +++ b/src/runner-org-sync/.dockerignore @@ -0,0 +1,15 @@ +bin/ +dist/ +coverage.out +coverage.html +*.test +.git/ +.gitignore +.golangci.yml +Makefile +README.md +infra/ +test/ +**/testdata/ +**/*_test.go +.DS_Store diff --git a/src/runner-org-sync/.gitignore b/src/runner-org-sync/.gitignore new file mode 100644 index 00000000000..e874f071658 --- /dev/null +++ b/src/runner-org-sync/.gitignore @@ -0,0 +1,6 @@ +bin/ +dist/ +coverage.out +coverage.html +*.test +.DS_Store diff --git a/src/runner-org-sync/.golangci.yml b/src/runner-org-sync/.golangci.yml new file mode 100644 index 00000000000..7ce3d3492db --- /dev/null +++ b/src/runner-org-sync/.golangci.yml @@ -0,0 +1,40 @@ +run: + timeout: 5m + tests: true + +linters: + default: none + enable: + - errcheck + - errorlint + - gocritic + - gosec + - govet + - ineffassign + - revive + - staticcheck + - unused + +linters-settings: + errorlint: + errorf: true + asserts: true + comparison: true + revive: + severity: warning + rules: + - name: exported + - name: var-naming + - name: error-return + - name: error-naming + - name: error-strings + - name: unused-parameter + - name: receiver-naming + - name: indent-error-flow + - name: package-comments + +issues: + exclude-rules: + - path: _test\.go + linters: + - gosec diff --git a/src/runner-org-sync/Dockerfile b/src/runner-org-sync/Dockerfile new file mode 100644 index 00000000000..eb50ed9771a --- /dev/null +++ b/src/runner-org-sync/Dockerfile @@ -0,0 +1,36 @@ +# syntax=docker/dockerfile:1.7 + +ARG GO_VERSION=1.26 + +# --- build ------------------------------------------------------------------ +FROM golang:${GO_VERSION}-alpine AS build + +WORKDIR /src + +# Module cache: download deps first so source changes don't re-download. +COPY go.mod go.sum ./ +RUN --mount=type=cache,target=/go/pkg/mod \ + go mod download + +COPY . . + +# Static binary so we can run on distroless/static without a libc. +# -trimpath strips local paths from stack traces, -s -w trims symbol tables. +RUN --mount=type=cache,target=/go/pkg/mod \ + --mount=type=cache,target=/root/.cache/go-build \ + CGO_ENABLED=0 GOOS=linux go build \ + -trimpath \ + -ldflags="-s -w" \ + -o /out/runner-org-sync \ + ./cmd/runner-org-sync + +# --- runtime ---------------------------------------------------------------- +# Distroless static (nonroot variant): no shell, no package manager, runs as +# uid 65532. ca-certificates are bundled, so TLS to Azure KV and the CDN +# works without extra setup. +FROM gcr.io/distroless/static-debian12:nonroot + +COPY --from=build /out/runner-org-sync /runner-org-sync + +USER 65532:65532 +ENTRYPOINT ["/runner-org-sync"] diff --git a/src/runner-org-sync/Makefile b/src/runner-org-sync/Makefile new file mode 100644 index 00000000000..33c7d71b52f --- /dev/null +++ b/src/runner-org-sync/Makefile @@ -0,0 +1,33 @@ +SHELL := /bin/bash +BINARY := bin/runner-org-sync +PKG := ./... + +.PHONY: build +build: + @mkdir -p bin + go build -trimpath -ldflags="-s -w" -o $(BINARY) ./cmd/runner-org-sync + +.PHONY: test +test: + go test -race -count=1 $(PKG) + +.PHONY: test-cover +test-cover: + go test -race -count=1 -coverprofile=coverage.out $(PKG) + go tool cover -html=coverage.out -o coverage.html + +.PHONY: lint +lint: + golangci-lint run + +.PHONY: tidy +tidy: + go mod tidy + +.PHONY: vet +vet: + go vet $(PKG) + +.PHONY: clean +clean: + rm -rf bin coverage.out coverage.html diff --git a/src/runner-org-sync/README.md b/src/runner-org-sync/README.md new file mode 100644 index 00000000000..5ee34ce7bcf --- /dev/null +++ b/src/runner-org-sync/README.md @@ -0,0 +1,222 @@ +# runner-org-sync + +A small, idempotent Kubernetes CronJob that bridges the Altinn organisation +list (published on the public CDN) to the per-organisation Gitea Actions +runners running in the Studio cluster. + +## What it does + +Every 15 minutes it: + +1. Loads the Gitea admin PAT from Azure Key Vault (via Workload Identity), or + from a local env var override for development. +2. Fetches `altinn-orgs.json` from `https://altinncdn.no/orgs/altinn-orgs.json`. +3. Filters orgs to those with at least one declared `environments` entry, + then intersects with a whitelist supplied via env var. +4. For each org in the desired set: + - if a `Secret altinn-gitea-runner--secret` already exists, leaves it + alone — registered tokens are preserved across reconciles, + - otherwise mints a fresh registration token via Gitea's admin API and + creates the Secret. +5. Deletes Secrets for orgs that are no longer in the desired set. +6. Writes a single `ConfigMap/runner-org-list` projecting the desired set; + the `gitea-org-runner-config` HelmRelease picks this up via Flux + `valuesFrom` and renders one runner Deployment per entry. + +Continue-on-partial-failure: a single org failing to mint does not abort the +run. Failed orgs are simply omitted from this tick's ConfigMap and retried +on the next; failures surface through metrics (`runner_org_sync.org.reconcile_errors`) +rather than CronJob exit codes. + +## Architecture + +``` + Azure Key Vault + │ + │ Workload Identity + ▼ + altinncdn.no ──► runner-org-sync (CronJob /15min) + altinn-orgs.json │ + │ filter: environments != ∅ ∧ whitelist + ▼ + ┌───────────────────────────────────────────┐ + │ studio-runners ns │ + │ │ + │ per-org Secrets ConfigMap │ + │ ┌──────────────┐ ┌─────────────────┐ │ + │ │ ttd-secret │ │ runner-org-list │ │ + │ │ brg-secret │ │ - ttd │ │ + │ │ dsb-secret │ │ - brg │ │ + │ │ ... │ │ - dsb │ │ + │ └──────┬───────┘ └────────┬────────┘ │ + │ │ │ │ + └──────────┼───────────────────┼────────────┘ + │ │ valuesFrom + │ ▼ + │ ┌───────────────────────────────┐ + │ │ gitea-org-runner-config │ + │ │ HelmRelease (Flux) │ + │ │ │ + │ │ renders one Deployment │ + │ │ per org-in-ConfigMap │ + │ └────────────┬──────────────────┘ + │ │ + │ secretKeyRef │ + ▼ ▼ + ┌───────────────────────────────────────────┐ + │ Gitea Actions runner Deployments │ + │ one per org, each ephemeral │ + └───────────────────────────────────────────┘ + │ + ▼ + OTel collector + (traces + metrics + logs at + otel-router.observability:4317) +``` + +Two distinct credentials live in distinct stores: + +| Credential | Sensitivity | Storage | +| ------------------------------------------ | ------------------------- | ------------------------------------------------------------------------------------------------------------- | +| Gitea admin PAT (mints tokens for any org) | High | Azure Key Vault, fetched at pod start via Workload Identity. Never persisted in K8s. | +| Per-org runner registration token | Lower (scoped to one org) | K8s Secret `altinn-gitea-runner--secret`, key `token`. Consumed by runner Deployment via `secretKeyRef`. | + +## Configuration + +All settings come from environment variables. The loader fails fast at +startup and aggregates every validation problem into one error. + +| Variable | Required | Purpose | +| -------------------------------------- | ------------------- | -------------------------------------------------------------------------------------------------- | +| `RUNNER_ORG_SYNC_GITEA_URL` | yes | Base URL for Gitea admin API | +| `RUNNER_ORG_SYNC_ORGS_JSON_URL` | yes | URL of `altinn-orgs.json` | +| `RUNNER_ORG_SYNC_OUTPUT_NAMESPACE` | yes | Target namespace (e.g. `studio-runners`) | +| `RUNNER_ORG_SYNC_SECRET_NAME_PATTERN` | yes | Must contain the `{org}` placeholder, e.g. `altinn-gitea-runner-{org}-secret` | +| `RUNNER_ORG_SYNC_CONFIGMAP_NAME` | yes | e.g. `runner-org-list` | +| `RUNNER_ORG_SYNC_KEYVAULT_NAME` | if no env PAT | Azure Key Vault name | +| `RUNNER_ORG_SYNC_KEYVAULT_SECRET_NAME` | if no env PAT | Secret name inside the vault | +| `RUNNER_ORG_SYNC_SYNC_ALL` | no | `true` to skip the whitelist filter | +| `RUNNER_ORG_SYNC_ORGS` | if `SYNC_ALL=false` | CSV whitelist, e.g. `ttd,brg,dsb` | +| `RUNNER_ORG_SYNC_GITEA_PAT` | no | Local-dev bypass for Key Vault. Source is logged at startup. | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | no | OTel collector endpoint (defaults via SDK) | +| `OTEL_SERVICE_NAME` | no | Defaults to `runner-org-sync` | +| `OTEL_RESOURCE_ATTRIBUTES` | no | e.g. `deployment.environment=dev` | +| `AZURE_*` | injected | Workload Identity webhook fills `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_FEDERATED_TOKEN_FILE` | + +## Reconcile algorithm + +``` +desired = (orgs with non-empty environments) ∩ whitelist (or all if syncAll=true) +existing_secrets = Secrets matching SECRET_NAME_PATTERN + +for org in desired: + if Secret exists for org: + skip (preserve registered token) + else: + mint registration token via Gitea + create Secret + +for secret in existing_secrets: + if secret.org not in desired: + delete Secret + +apply ConfigMap with one entry per (desired ∩ orgs whose Secret now exists) +``` + +Existing Secrets are never re-minted; this preserves any in-flight runner +registrations and avoids churn on Deployments that already work. Deletions +remove only the K8s Secret; orphaned Gitea-side runner records are left to +go idle (cleanup is a separate concern). + +## Observability + +This service emits OpenTelemetry traces and metrics; logs are kept thin and +intended for `kubectl logs` triage only. + +**Traces** — one root span per reconcile run, with per-stage children. Per-org +work surfaces as span events on the parent span (`org.token.minted`, +`org.secret.created`, `org.secret.deleted`, `org.skipped`). + +**Metrics** — see the package source for the canonical list. Highlights: + +- `runner_org_sync.reconcile.duration` (histogram, by outcome) +- `runner_org_sync.secrets.created` / `.deleted` / `.skipped` (counters) +- `runner_org_sync.org.reconcile_errors` (counter, by org and stage) — + **the signal worth paging on if sustained non-zero** +- `runner_org_sync.{gitea,cdn,keyvault}.call.duration` (histograms) + +**Logs** — JSON via `slog`, ~5 lines per healthy run plus any WARNs: + +``` +reconcile.start run_id=... +pat.loaded source=keyvault len=40 +orgs.kept count=9 orgs=[ttd,brg,dsb,...] +org.reconcile.failed org=dsb stage=mint err=... (WARN, only on failure) +reconcile.end duration_ms=... outcome=success|partial|failure +``` + +## Local development + +The Gitea PAT can come from an env var instead of Key Vault, sidestepping +the need for Azure auth on a laptop: + +```sh +export RUNNER_ORG_SYNC_GITEA_PAT='your-local-or-test-pat' +export RUNNER_ORG_SYNC_GITEA_URL='http://localhost:3000' +export RUNNER_ORG_SYNC_ORGS_JSON_URL='https://altinncdn.no/orgs/altinn-orgs.json' +export RUNNER_ORG_SYNC_OUTPUT_NAMESPACE='studio-runners' +export RUNNER_ORG_SYNC_SECRET_NAME_PATTERN='altinn-gitea-runner-{org}-secret' +export RUNNER_ORG_SYNC_CONFIGMAP_NAME='runner-org-list' +export RUNNER_ORG_SYNC_ORGS='ttd,brg' + +go run ./cmd/runner-org-sync +``` + +The first log line will read `pat.loaded source=env`, making any accidental +fallback in a non-local environment immediately visible. + +## Testing + +```sh +make test # unit tests with race detector +make test-cover # coverage report at coverage.html +make lint # golangci-lint +``` + +Unit tests use stdlib `testing`, `net/http/httptest`, and +`k8s.io/client-go/kubernetes/fake`. No testify, no other test frameworks. + +Integration tests (kind-based) live under `test/integration/` and use stub +CDN + stub Gitea services in-cluster. They cover seven scenarios: + +1. Cold start +2. Idempotent re-run (no writes on unchanged input) +3. Org added +4. Org removed +5. Org with empty `environments` (filtered out) +6. Whitelist excludes +7. Gitea partial failure (one org fails, others succeed) + +Workload Identity is Azure-specific and is not covered by kind tests; +verify that path manually in a dev cluster. + +## Project layout + +``` +. +├── cmd/runner-org-sync/ entry point +├── internal/ +│ ├── config/ env-var loading + validation +│ ├── cdn/ altinn-orgs.json fetch + decode +│ ├── gitea/ registration-token mint client +│ ├── keyvault/ PAT loader (env override → Key Vault) +│ ├── k8sstate/ Secret + ConfigMap reconcile primitives +│ ├── reconcile/ pure orchestration +│ └── telemetry/ OTel + slog wiring +├── test/integration/ kind harness + scenarios +├── infra/kustomize/ Kubernetes manifests (Flux post-build substitution) +├── Dockerfile +├── Makefile +├── go.mod, go.sum +└── README.md +``` diff --git a/src/runner-org-sync/cmd/runner-org-sync/main.go b/src/runner-org-sync/cmd/runner-org-sync/main.go new file mode 100644 index 00000000000..d7bf90e68a9 --- /dev/null +++ b/src/runner-org-sync/cmd/runner-org-sync/main.go @@ -0,0 +1,230 @@ +// Command runner-org-sync runs one reconcile cycle: discover orgs from the +// Altinn CDN, mint missing per-org Gitea runner registration tokens, delete +// Secrets for orgs that fell out of the desired set, and project the runners +// ConfigMap. It is designed to run as a Kubernetes CronJob; each invocation +// is a fresh pod that reads what it needs, performs the work, and exits. +package main + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os" + "os/signal" + "syscall" + "time" + + "github.com/google/uuid" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/trace" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + + "altinn.studio/runner-org-sync/internal/cdn" + "altinn.studio/runner-org-sync/internal/config" + "altinn.studio/runner-org-sync/internal/gitea" + "altinn.studio/runner-org-sync/internal/k8sstate" + "altinn.studio/runner-org-sync/internal/keyvault" + "altinn.studio/runner-org-sync/internal/reconcile" + "altinn.studio/runner-org-sync/internal/telemetry" +) + +const ( + telemetryShutdownTimeout = 10 * time.Second + serviceName = "runner-org-sync" +) + +func main() { + if err := run(); err != nil { + slog.Error("fatal", "err", err.Error()) + os.Exit(1) + } +} + +func run() error { + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer cancel() + + cfg, err := config.Load() + if err != nil { + return fmt.Errorf("config: %w", err) + } + + tel, shutdown, err := telemetry.Init(ctx, serviceName) + if err != nil { + return fmt.Errorf("telemetry init: %w", err) + } + defer func() { + sctx, scancel := context.WithTimeout(context.Background(), telemetryShutdownTimeout) + defer scancel() + if err := shutdown(sctx); err != nil { + slog.Warn("telemetry shutdown returned error", "err", err.Error()) + } + }() + + metrics, err := telemetry.NewMetrics(tel.Meter) + if err != nil { + return fmt.Errorf("telemetry metrics: %w", err) + } + + runID := uuid.NewString() + logger := tel.Logger.With("run_id", runID, "service", serviceName) + + pat, patSource, err := loadPAT(ctx, cfg) + if err != nil { + return fmt.Errorf("load PAT: %w", err) + } + logger.Info("pat.loaded", "source", string(patSource), "len", len(pat)) + + k8sClient, err := buildK8sClient() + if err != nil { + return fmt.Errorf("build kubernetes client: %w", err) + } + store := k8sstate.NewStore(k8sClient, cfg.OutputNamespace) + giteaClient := gitea.NewClient(cfg.GiteaURL, pat) + cdnClient := cdn.NewClient(cfg.OrgsJSONURL) + + rec, err := reconcile.New(reconcile.Options{ + Source: cdnClient, + Minter: giteaClient, + Store: store, + SecretNameFor: cfg.SecretNameFor, + ConfigMapName: cfg.ConfigMapName, + Whitelist: cfg.WhitelistedOrgs, + SyncAll: cfg.SyncAll, + }) + if err != nil { + return fmt.Errorf("build reconciler: %w", err) + } + + ctx, span := tel.Tracer.Start(ctx, "runner_org_sync.reconcile", + trace.WithAttributes(attribute.String("run_id", runID)), + ) + defer span.End() + + logger.Info("reconcile.start") + start := time.Now() + report, runErr := rec.Run(ctx) + duration := time.Since(start) + + emitMetrics(ctx, metrics, report, duration) + addSpanEvents(span, report) + + if len(report.Desired) > 0 { + logger.Info("orgs.kept", "count", len(report.Desired), "orgs", report.Desired) + } + for _, f := range report.FailedOrgs { + logger.Warn("org.reconcile.failed", + "org", f.Org, "stage", f.Stage, "err", f.Err.Error()) + } + + logger.Info("reconcile.end", + "duration_ms", duration.Milliseconds(), + "outcome", string(report.Outcome), + "discovered", report.Discovered, + "desired", len(report.Desired), + "created", len(report.SecretsCreated), + "deleted", len(report.SecretsDeleted), + "skipped", len(report.SecretsSkipped), + "failed", len(report.FailedOrgs), + "configmap_changed", report.ConfigMapChanged, + ) + + if runErr != nil { + span.RecordError(runErr) + span.SetStatus(codes.Error, runErr.Error()) + return runErr + } + if report.Outcome == reconcile.OutcomePartial { + // Continue-on-partial: still exit 0; metric + WARN log carries the signal. + span.SetStatus(codes.Ok, "partial") + } else { + span.SetStatus(codes.Ok, "success") + } + return nil +} + +// loadPAT resolves the Gitea admin PAT, honouring the env-var override for +// local development. In-cluster it goes through Azure Key Vault using +// Workload Identity via DefaultAzureCredential. +func loadPAT(ctx context.Context, cfg config.Config) (string, keyvault.Source, error) { + var getter keyvault.Getter + if cfg.GiteaPATOverride == "" { + g, err := keyvault.NewAzureGetter(cfg.KeyVaultName) + if err != nil { + return "", "", fmt.Errorf("build keyvault getter: %w", err) + } + getter = g + } + loader := keyvault.NewLoader(cfg.GiteaPATOverride, getter, cfg.KeyVaultSecretName) + return loader.Load(ctx) +} + +// buildK8sClient returns a clientset that prefers in-cluster config and +// falls back to a local kubeconfig (KUBECONFIG / $HOME/.kube/config) so a +// developer can run the binary directly against a kind cluster. +func buildK8sClient() (kubernetes.Interface, error) { + if cfg, err := rest.InClusterConfig(); err == nil { + return kubernetes.NewForConfig(cfg) + } else if !errors.Is(err, rest.ErrNotInCluster) { + return nil, fmt.Errorf("in-cluster config: %w", err) + } + loading := clientcmd.NewDefaultClientConfigLoadingRules() + kubeCfg, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loading, &clientcmd.ConfigOverrides{}).ClientConfig() + if err != nil { + return nil, fmt.Errorf("local kubeconfig: %w", err) + } + return kubernetes.NewForConfig(kubeCfg) +} + +func emitMetrics(ctx context.Context, m *telemetry.Metrics, r reconcile.Report, d time.Duration) { + outcomeAttr := attribute.String("outcome", string(r.Outcome)) + m.ReconcileDuration.Record(ctx, d.Seconds(), metric.WithAttributes(outcomeAttr)) + m.ReconcileRuns.Add(ctx, 1, metric.WithAttributes(outcomeAttr)) + m.OrgsDiscovered.Record(ctx, int64(r.Discovered)) + m.OrgsDesired.Record(ctx, int64(len(r.Desired))) + m.RecordFiltered(ctx, reconcile.FilterReasonNoEnv, len(r.FilteredNoEnv)) + m.RecordFiltered(ctx, reconcile.FilterReasonWhitelist, len(r.FilteredWhitelist)) + + for _, org := range r.SecretsCreated { + m.SecretsCreated.Add(ctx, 1, metric.WithAttributes(attribute.String("org", org))) + } + for _, org := range r.SecretsDeleted { + m.SecretsDeleted.Add(ctx, 1, metric.WithAttributes(attribute.String("org", org))) + } + m.SecretsSkipped.Add(ctx, int64(len(r.SecretsSkipped))) + + for _, f := range r.FailedOrgs { + m.OrgReconcileErrors.Add(ctx, 1, metric.WithAttributes( + attribute.String("org", f.Org), + attribute.String("stage", f.Stage), + )) + } + + m.ConfigMapApplied.Add(ctx, 1, metric.WithAttributes( + attribute.Bool("changed", r.ConfigMapChanged), + )) +} + +func addSpanEvents(span trace.Span, r reconcile.Report) { + for _, org := range r.SecretsCreated { + span.AddEvent("org.secret.created", trace.WithAttributes(attribute.String("org", org))) + } + for _, org := range r.SecretsDeleted { + span.AddEvent("org.secret.deleted", trace.WithAttributes(attribute.String("org", org))) + } + for _, org := range r.SecretsSkipped { + span.AddEvent("org.skipped", trace.WithAttributes(attribute.String("org", org))) + } + for _, f := range r.FailedOrgs { + span.AddEvent("org.reconcile.failed", trace.WithAttributes( + attribute.String("org", f.Org), + attribute.String("stage", f.Stage), + attribute.String("err", f.Err.Error()), + )) + } +} diff --git a/src/runner-org-sync/go.mod b/src/runner-org-sync/go.mod new file mode 100644 index 00000000000..6af881d65e1 --- /dev/null +++ b/src/runner-org-sync/go.mod @@ -0,0 +1,75 @@ +module altinn.studio/runner-org-sync + +go 1.26.0 + +require ( + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 + github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.4.0 + github.com/google/uuid v1.6.0 + go.opentelemetry.io/otel v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 + go.opentelemetry.io/otel/metric v1.43.0 + go.opentelemetry.io/otel/sdk v1.43.0 + go.opentelemetry.io/otel/sdk/metric v1.43.0 + go.opentelemetry.io/otel/trace v1.43.0 + k8s.io/api v0.36.0 + k8s.io/apimachinery v0.36.0 + k8s.io/client-go v0.36.0 +) + +require ( + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect + github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0 // indirect + github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.13.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/golang-jwt/jwt/v5 v5.3.0 // indirect + github.com/google/gnostic-models v0.7.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect + github.com/spf13/pflag v1.0.9 // indirect + github.com/x448/float16 v0.8.4 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect + go.opentelemetry.io/proto/otlp v1.10.0 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/crypto v0.49.0 // indirect + golang.org/x/net v0.52.0 // indirect + golang.org/x/oauth2 v0.35.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/term v0.41.0 // indirect + golang.org/x/text v0.35.0 // indirect + golang.org/x/time v0.14.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/grpc v1.80.0 // indirect + google.golang.org/protobuf v1.36.12-0.20260120151049-f2248ac996af // indirect + gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/klog/v2 v2.140.0 // indirect + k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a // indirect + k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 // indirect + sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.2 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect +) diff --git a/src/runner-org-sync/go.sum b/src/runner-org-sync/go.sum new file mode 100644 index 00000000000..c13d2c1cea7 --- /dev/null +++ b/src/runner-org-sync/go.sum @@ -0,0 +1,183 @@ +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 h1:JXg2dwJUmPB9JmtVmdEB16APJ7jurfbY5jnfXpJoRMc= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0/go.mod h1:YD5h/ldMsG0XiIw7PdyNhLxaM317eFh5yNLccNfGdyw= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 h1:Hk5QBxZQC1jb2Fwj6mpzme37xbCDdNTxU7O9eb5+LB4= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1/go.mod h1:IYus9qsFobWIc2YVwe/WPjcnyCkPKtnHAqUYeebc8z0= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2 h1:yz1bePFlP5Vws5+8ez6T3HWXPmwOK7Yvq8QxDBD3SKY= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2/go.mod h1:Pa9ZNPuoNu/GztvBSKk9J1cDJW6vk/n0zLtV4mgd8N8= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 h1:9iefClla7iYpfYWdzPCRDozdmndjTm8DXdpCzPajMgA= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2/go.mod h1:XtLgD3ZD34DAaVIIAyG3objl5DynM3CQ/vMcbBNJZGI= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.4.0 h1:/g8S6wk65vfC6m3FIxJ+i5QDyN9JWwXI8Hb0Img10hU= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.4.0/go.mod h1:gpl+q95AzZlKVI3xSoseF9QPrypk0hQqBiJYeB/cR/I= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0 h1:nCYfgcSyHZXJI8J0IWE5MsCGlb2xp9fJiXyxWgmOFg4= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0/go.mod h1:ucUjca2JtSZboY8IoUqyQyuuXvwbMBVwFOm0vdQPNhA= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE= +github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 h1:XRzhVemXdgvJqCH0sFfrBUTnUJSBrBf7++ypk+twtRs= +github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= +github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= +github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/keybase/go-keychain v0.0.1 h1:way+bWYa6lDppZoZcgMbYsvC7GxljxrskdNInRtuthU= +github.com/keybase/go-keychain v0.0.1/go.mod h1:PdEILRW3i9D8JcdM+FmY6RwkHGnhHxXwkPPMeUgOK1k= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= +github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 h1:8UQVDcZxOJLtX6gxtDt3vY2WTgvZqMQRzjsqiIHQdkc= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0/go.mod h1:2lmweYCiHYpEjQ/lSJBYhj9jP1zvCvQW4BqL9dnT7FQ= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 h1:RAE+JPfvEmvy+0LzyUA25/SGawPwIUbZ6u0Wug54sLc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0/go.mod h1:AGmbycVGEsRx9mXMZ75CsOyhSP6MFIcj/6dnG+vhVjk= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= +go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g= +go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= +golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= +google.golang.org/protobuf v1.36.12-0.20260120151049-f2248ac996af h1:+5/Sw3GsDNlEmu7TfklWKPdQ0Ykja5VEmq2i817+jbI= +google.golang.org/protobuf v1.36.12-0.20260120151049-f2248ac996af/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.36.0 h1:SgqDhZzHdOtMk40xVSvCXkP9ME0H05hPM3p9AB1kL80= +k8s.io/api v0.36.0/go.mod h1:m1LVrGPNYax5NBHdO+QuAedXyuzTt4RryI/qnmNvs34= +k8s.io/apimachinery v0.36.0 h1:jZyPzhd5Z+3h9vJLt0z9XdzW9VzNzWAUw+P1xZ9PXtQ= +k8s.io/apimachinery v0.36.0/go.mod h1:FklypaRJt6n5wUIwWXIP6GJlIpUizTgfo1T/As+Tyxc= +k8s.io/client-go v0.36.0 h1:pOYi7C4RHChYjMiHpZSpSbIM6ZxVbRXBy7CuiIwqA3c= +k8s.io/client-go v0.36.0/go.mod h1:ZKKcpwF0aLYfkHFCjillCKaTK/yBkEDHTDXCFY6AS9Y= +k8s.io/klog/v2 v2.140.0 h1:Tf+J3AH7xnUzZyVVXhTgGhEKnFqye14aadWv7bzXdzc= +k8s.io/klog/v2 v2.140.0/go.mod h1:o+/RWfJ6PwpnFn7OyAG3QnO47BFsymfEfrz6XyYSSp0= +k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a h1:xCeOEAOoGYl2jnJoHkC3hkbPJgdATINPMAxaynU2Ovg= +k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a/go.mod h1:uGBT7iTA6c6MvqUvSXIaYZo9ukscABYi2btjhvgKGZ0= +k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 h1:AZYQSJemyQB5eRxqcPky+/7EdBj0xi3g0ZcxxJ7vbWU= +k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.2 h1:kwVWMx5yS1CrnFWA/2QHyRVJ8jM6dBA80uLmm0wJkk8= +sigs.k8s.io/structured-merge-diff/v6 v6.3.2/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/src/runner-org-sync/infra/kustomize/cronjob.yaml b/src/runner-org-sync/infra/kustomize/cronjob.yaml new file mode 100644 index 00000000000..08c9b3711ca --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/cronjob.yaml @@ -0,0 +1,91 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: runner-org-sync + labels: + app: runner-org-sync + annotations: + altinn.studio/image: "altinn-studio/runner-org-sync:latest" + altinn.studio/image-tag: "latest" +spec: + # Every 15 minutes, aligned to wall-clock quarters. + schedule: "*/15 * * * *" + + # Avoid overlap. A reconcile takes seconds; if a previous run somehow is + # still going, skipping is correct — the next tick has the latest state. + concurrencyPolicy: Forbid + + # If the controller misses a window (e.g. AKS upgrade), do not back-fill. + # The next scheduled run does the right thing on its own. + startingDeadlineSeconds: 60 + + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 3 + + jobTemplate: + spec: + backoffLimit: 0 + # Job-level deadline so a wedged pod cannot pile up next to a fresh one. + activeDeadlineSeconds: 300 + template: + metadata: + labels: + app: runner-org-sync + # Tells the Workload Identity webhook to inject the federated + # token volume and AZURE_* env vars into this pod. + azure.workload.identity/use: "true" + spec: + serviceAccountName: runner-org-sync + restartPolicy: Never + terminationGracePeriodSeconds: 30 + securityContext: + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + fsGroup: 65532 + seccompProfile: + type: RuntimeDefault + containers: + - name: runner-org-sync + image: altinn-studio/runner-org-sync:latest + imagePullPolicy: IfNotPresent + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + privileged: false + capabilities: + drop: ["ALL"] + env: + - name: RUNNER_ORG_SYNC_GITEA_URL + value: "http://altinn-repositories-public.default.svc.cluster.local" + - name: RUNNER_ORG_SYNC_ORGS_JSON_URL + value: "https://altinncdn.no/orgs/altinn-orgs.json" + - name: RUNNER_ORG_SYNC_OUTPUT_NAMESPACE + value: "studio-runners" + - name: RUNNER_ORG_SYNC_SECRET_NAME_PATTERN + value: "altinn-gitea-runner-{org}-secret" + - name: RUNNER_ORG_SYNC_CONFIGMAP_NAME + value: "runner-org-list" + - name: RUNNER_ORG_SYNC_SYNC_ALL + value: "false" + - name: RUNNER_ORG_SYNC_ORGS + value: "${RUNNER_ORG_SYNC_ORGS}" + - name: RUNNER_ORG_SYNC_KEYVAULT_NAME + value: "${RUNNER_ORG_SYNC_KEYVAULT_NAME}" + - name: RUNNER_ORG_SYNC_KEYVAULT_SECRET_NAME + value: "gitea-admin-pat" + - name: OTEL_SERVICE_NAME + value: "runner-org-sync" + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://otel-router.observability.svc.cluster.local:4317" + - name: OTEL_EXPORTER_OTLP_PROTOCOL + value: "grpc" + - name: OTEL_RESOURCE_ATTRIBUTES + value: "service.namespace=studio-runners,deployment.environment=${ENVIRONMENT}" + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 500m + memory: 256Mi diff --git a/src/runner-org-sync/infra/kustomize/kustomization.yaml b/src/runner-org-sync/infra/kustomize/kustomization.yaml new file mode 100644 index 00000000000..ca60aab1837 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/kustomization.yaml @@ -0,0 +1,24 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: studio-runners + +resources: + - serviceaccount.yaml + - role.yaml + - rolebinding.yaml + - cronjob.yaml + +# Copy the image annotation onto the container spec. The annotation value +# is itself substituted at deploy time by Flux post-build substitution. +replacements: + - source: + kind: CronJob + name: runner-org-sync + fieldPath: metadata.annotations.[altinn.studio/image] + targets: + - select: + kind: CronJob + name: runner-org-sync + fieldPaths: + - spec.jobTemplate.spec.template.spec.containers.[name=runner-org-sync].image diff --git a/src/runner-org-sync/infra/kustomize/role.yaml b/src/runner-org-sync/infra/kustomize/role.yaml new file mode 100644 index 00000000000..f03fbdeb99f --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: runner-org-sync +rules: + # Per-org registration-token Secrets: list to inventory, get to check + # existence, create on onboarding, delete on offboarding. No "update" — + # we never modify a Secret in place; we delete and recreate. + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list", "create", "delete"] + + # Runner-org-list ConfigMap: get to detect drift, create on first run, + # update on subsequent changes. + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "create", "update"] diff --git a/src/runner-org-sync/infra/kustomize/rolebinding.yaml b/src/runner-org-sync/infra/kustomize/rolebinding.yaml new file mode 100644 index 00000000000..ef1d9999b45 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/rolebinding.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: runner-org-sync +subjects: + - kind: ServiceAccount + name: runner-org-sync +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: runner-org-sync diff --git a/src/runner-org-sync/infra/kustomize/serviceaccount.yaml b/src/runner-org-sync/infra/kustomize/serviceaccount.yaml new file mode 100644 index 00000000000..1f6c27d7416 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/serviceaccount.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + # Keep this name stable: the Azure Entra federated credential subject is + # bound to ServiceAccount name + namespace. Renaming this SA invalidates + # Workload Identity until the federated credential is updated to match. + name: runner-org-sync + annotations: + azure.workload.identity/client-id: "${RUNNER_ORG_SYNC_ENTRA_CLIENT_ID}" diff --git a/src/runner-org-sync/internal/cdn/cdn.go b/src/runner-org-sync/internal/cdn/cdn.go new file mode 100644 index 00000000000..58514e36f92 --- /dev/null +++ b/src/runner-org-sync/internal/cdn/cdn.go @@ -0,0 +1,119 @@ +// Package cdn fetches and decodes the Altinn organisations document from +// https://altinncdn.no/orgs/altinn-orgs.json (or an equivalent test URL). +// +// The CDN document is a single JSON object whose top-level "orgs" key maps +// short organisation codes (e.g. "ttd", "brg") to per-org metadata. Only +// fields used downstream are decoded; the rest are silently ignored. +package cdn + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "time" +) + +const ( + defaultTimeout = 30 * time.Second + defaultUserAgent = "runner-org-sync" + maxErrorBody = 512 +) + +// ErrUnexpectedStatus is returned when the CDN responds with non-2xx. +var ErrUnexpectedStatus = errors.New("cdn: unexpected status") + +// Org is one entry from altinn-orgs.json. Code is the map key from the +// document (populated by Fetch, not by the JSON decoder). +type Org struct { + Code string `json:"-"` + Name map[string]string `json:"name"` + Orgnr string `json:"orgnr"` + Environments []string `json:"environments"` +} + +// DisplayName returns the most useful human-readable name available: +// English preferred, any language otherwise, falling back to the code. +// Used for span attributes and log fields, not for reconciliation logic. +func (o Org) DisplayName() string { + if v, ok := o.Name["en"]; ok && v != "" { + return v + } + for _, v := range o.Name { + if v != "" { + return v + } + } + return o.Code +} + +// Client fetches the orgs document. +type Client struct { + httpClient *http.Client + url string + userAgent string +} + +// Option configures a Client. +type Option func(*Client) + +// WithHTTPClient overrides the default HTTP client (use in tests with httptest). +func WithHTTPClient(h *http.Client) Option { + return func(c *Client) { c.httpClient = h } +} + +// WithUserAgent overrides the outgoing User-Agent header. +func WithUserAgent(ua string) Option { + return func(c *Client) { c.userAgent = ua } +} + +// NewClient constructs a Client targeting the given URL. +func NewClient(url string, opts ...Option) *Client { + c := &Client{ + httpClient: &http.Client{Timeout: defaultTimeout}, + url: url, + userAgent: defaultUserAgent, + } + for _, opt := range opts { + opt(c) + } + return c +} + +// Fetch retrieves the orgs document and returns one Org per entry with the +// org code populated from the map key. Order is not stable. +func (c *Client) Fetch(ctx context.Context) ([]Org, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.url, nil) + if err != nil { + return nil, fmt.Errorf("cdn: build request: %w", err) + } + req.Header.Set("User-Agent", c.userAgent) + req.Header.Set("Accept", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("cdn: get %s: %w", c.url, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBody)) + return nil, fmt.Errorf("%w %d from %s: %s", ErrUnexpectedStatus, resp.StatusCode, c.url, string(body)) + } + + var doc struct { + Orgs map[string]Org `json:"orgs"` + } + if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil { + return nil, fmt.Errorf("cdn: decode body: %w", err) + } + + orgs := make([]Org, 0, len(doc.Orgs)) + for code, o := range doc.Orgs { + o.Code = code + orgs = append(orgs, o) + } + return orgs, nil +} diff --git a/src/runner-org-sync/internal/cdn/cdn_test.go b/src/runner-org-sync/internal/cdn/cdn_test.go new file mode 100644 index 00000000000..ca53e353ed6 --- /dev/null +++ b/src/runner-org-sync/internal/cdn/cdn_test.go @@ -0,0 +1,194 @@ +package cdn + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "sort" + "strings" + "testing" + "time" +) + +const sampleOrgsJSON = `{ + "orgs": { + "ttd": { + "name": {"en": "Test org TTD", "nb": "Test org TTD", "nn": "Test org TTD"}, + "orgnr": "991825827", + "environments": ["tt02", "production"] + }, + "brg": { + "name": {"en": "Brønnøysundregistrene", "nb": "Brønnøysundregistrene"}, + "orgnr": "974760673", + "environments": ["tt02", "production"] + }, + "acn": { + "name": {"en": "ACN Test org"}, + "orgnr": "999999990", + "environments": [] + } + } +}` + +func newStubServer(t *testing.T, handler http.HandlerFunc) *httptest.Server { + t.Helper() + s := httptest.NewServer(handler) + t.Cleanup(s.Close) + return s +} + +func TestFetch_Happy(t *testing.T) { + var gotUA, gotAccept, gotMethod string + s := newStubServer(t, func(w http.ResponseWriter, r *http.Request) { + gotMethod = r.Method + gotUA = r.Header.Get("User-Agent") + gotAccept = r.Header.Get("Accept") + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(sampleOrgsJSON)) + }) + + c := NewClient(s.URL, WithUserAgent("test-agent")) + orgs, err := c.Fetch(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got, want := len(orgs), 3; got != want { + t.Fatalf("len(orgs) = %d, want %d", got, want) + } + + // Index for deterministic assertions (map iteration is random). + byCode := indexByCode(orgs) + + ttd, ok := byCode["ttd"] + if !ok { + t.Fatal("ttd missing from result") + } + if got, want := ttd.Code, "ttd"; got != want { + t.Errorf("ttd.Code = %q, want %q", got, want) + } + if got, want := ttd.Orgnr, "991825827"; got != want { + t.Errorf("ttd.Orgnr = %q, want %q", got, want) + } + if got, want := ttd.Environments, []string{"tt02", "production"}; !equalSlice(got, want) { + t.Errorf("ttd.Environments = %v, want %v", got, want) + } + if got, want := ttd.DisplayName(), "Test org TTD"; got != want { + t.Errorf("ttd.DisplayName = %q, want %q", got, want) + } + + acn := byCode["acn"] + if len(acn.Environments) != 0 { + t.Errorf("acn.Environments = %v, want empty", acn.Environments) + } + + if gotMethod != http.MethodGet { + t.Errorf("HTTP method = %q, want GET", gotMethod) + } + if gotUA != "test-agent" { + t.Errorf("User-Agent = %q, want test-agent", gotUA) + } + if !strings.Contains(gotAccept, "application/json") { + t.Errorf("Accept = %q does not contain application/json", gotAccept) + } +} + +func TestFetch_EmptyOrgs(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{"orgs": {}}`)) + }) + c := NewClient(s.URL) + orgs, err := c.Fetch(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(orgs) != 0 { + t.Errorf("len(orgs) = %d, want 0", len(orgs)) + } +} + +func TestFetch_MalformedJSON(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{"orgs": this is not json}`)) + }) + c := NewClient(s.URL) + _, err := c.Fetch(context.Background()) + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), "decode") { + t.Errorf("error should mention decode failure; got: %v", err) + } +} + +func TestFetch_Non200(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte("upstream is down")) + }) + c := NewClient(s.URL) + _, err := c.Fetch(context.Background()) + if err == nil { + t.Fatal("expected error, got nil") + } + if !errors.Is(err, ErrUnexpectedStatus) { + t.Errorf("expected ErrUnexpectedStatus, got %v", err) + } +} + +func TestFetch_ContextCancelled(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + time.Sleep(200 * time.Millisecond) + _, _ = w.Write([]byte(sampleOrgsJSON)) + }) + c := NewClient(s.URL) + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond) + defer cancel() + _, err := c.Fetch(ctx) + if err == nil { + t.Fatal("expected error, got nil") + } +} + +func TestDisplayName(t *testing.T) { + cases := []struct { + name string + org Org + want string + }{ + {"prefers en", Org{Code: "x", Name: map[string]string{"en": "English", "nb": "Norsk"}}, "English"}, + {"falls back to any language", Org{Code: "x", Name: map[string]string{"nb": "Norsk"}}, "Norsk"}, + {"empty en falls through", Org{Code: "x", Name: map[string]string{"en": "", "nn": "Nynorsk"}}, "Nynorsk"}, + {"no name uses code", Org{Code: "x"}, "x"}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + if got := c.org.DisplayName(); got != c.want { + t.Errorf("DisplayName = %q, want %q", got, c.want) + } + }) + } +} + +func indexByCode(orgs []Org) map[string]Org { + m := make(map[string]Org, len(orgs)) + for _, o := range orgs { + m[o.Code] = o + } + return m +} + +func equalSlice(a, b []string) bool { + if len(a) != len(b) { + return false + } + ac, bc := append([]string(nil), a...), append([]string(nil), b...) + sort.Strings(ac) + sort.Strings(bc) + for i := range ac { + if ac[i] != bc[i] { + return false + } + } + return true +} diff --git a/src/runner-org-sync/internal/config/config.go b/src/runner-org-sync/internal/config/config.go new file mode 100644 index 00000000000..caf59e457c0 --- /dev/null +++ b/src/runner-org-sync/internal/config/config.go @@ -0,0 +1,147 @@ +// Package config loads and validates the runner-org-sync runtime configuration +// from environment variables. The loader is fail-fast and aggregates all +// invalid/missing values into a single error so a misconfigured CronJob +// surfaces every problem in one run, not one per restart. +package config + +import ( + "errors" + "fmt" + "os" + "strings" +) + +const ( + EnvGiteaURL = "RUNNER_ORG_SYNC_GITEA_URL" + EnvOrgsJSONURL = "RUNNER_ORG_SYNC_ORGS_JSON_URL" + EnvOutputNamespace = "RUNNER_ORG_SYNC_OUTPUT_NAMESPACE" + EnvSecretNamePattern = "RUNNER_ORG_SYNC_SECRET_NAME_PATTERN" + EnvConfigMapName = "RUNNER_ORG_SYNC_CONFIGMAP_NAME" + EnvKeyVaultName = "RUNNER_ORG_SYNC_KEYVAULT_NAME" + EnvKeyVaultSecretName = "RUNNER_ORG_SYNC_KEYVAULT_SECRET_NAME" + EnvSyncAll = "RUNNER_ORG_SYNC_SYNC_ALL" + EnvWhitelistedOrgs = "RUNNER_ORG_SYNC_ORGS" + EnvGiteaPATOverride = "RUNNER_ORG_SYNC_GITEA_PAT" + + // OrgPlaceholder is the substring SecretNamePattern must contain; + // it is substituted with the org code at apply time. + OrgPlaceholder = "{org}" +) + +// Config holds validated runtime settings. Construct only via Load/LoadFrom. +type Config struct { + GiteaURL string + OrgsJSONURL string + OutputNamespace string + SecretNamePattern string + ConfigMapName string + KeyVaultName string + KeyVaultSecretName string + SyncAll bool + WhitelistedOrgs []string + GiteaPATOverride string +} + +// Getter abstracts os.Getenv so tests can inject a fake environment without +// mutating the process global state. +type Getter func(key string) string + +// Load reads configuration from the process environment. +func Load() (Config, error) { + return LoadFrom(os.Getenv) +} + +// LoadFrom reads configuration using the supplied getter and validates it. +// Every failure is collected and reported in a single joined error. +func LoadFrom(get Getter) (Config, error) { + cfg := Config{ + GiteaURL: strings.TrimSpace(get(EnvGiteaURL)), + OrgsJSONURL: strings.TrimSpace(get(EnvOrgsJSONURL)), + OutputNamespace: strings.TrimSpace(get(EnvOutputNamespace)), + SecretNamePattern: strings.TrimSpace(get(EnvSecretNamePattern)), + ConfigMapName: strings.TrimSpace(get(EnvConfigMapName)), + KeyVaultName: strings.TrimSpace(get(EnvKeyVaultName)), + KeyVaultSecretName: strings.TrimSpace(get(EnvKeyVaultSecretName)), + SyncAll: parseBool(get(EnvSyncAll)), + WhitelistedOrgs: parseCSV(get(EnvWhitelistedOrgs)), + GiteaPATOverride: get(EnvGiteaPATOverride), + } + + var errs []error + requireField(&errs, EnvGiteaURL, cfg.GiteaURL) + requireField(&errs, EnvOrgsJSONURL, cfg.OrgsJSONURL) + requireField(&errs, EnvOutputNamespace, cfg.OutputNamespace) + requireField(&errs, EnvSecretNamePattern, cfg.SecretNamePattern) + requireField(&errs, EnvConfigMapName, cfg.ConfigMapName) + + if cfg.SecretNamePattern != "" && !strings.Contains(cfg.SecretNamePattern, OrgPlaceholder) { + errs = append(errs, fmt.Errorf("%s must contain the %q placeholder", EnvSecretNamePattern, OrgPlaceholder)) + } + + // PAT must be reachable either via override (local dev) or via Key Vault (in-cluster). + if cfg.GiteaPATOverride == "" { + requireField(&errs, EnvKeyVaultName, cfg.KeyVaultName) + requireField(&errs, EnvKeyVaultSecretName, cfg.KeyVaultSecretName) + } + + // Either syncAll=true or a non-empty whitelist. An empty intersection is + // almost certainly a misconfiguration, not an intended "sync nothing". + if !cfg.SyncAll && len(cfg.WhitelistedOrgs) == 0 { + errs = append(errs, fmt.Errorf("either %s=true or %s must be a non-empty CSV list", EnvSyncAll, EnvWhitelistedOrgs)) + } + + if len(errs) > 0 { + return Config{}, fmt.Errorf("invalid configuration: %w", errors.Join(errs...)) + } + return cfg, nil +} + +// SecretNameFor renders SecretNamePattern for the given org code. +func (c Config) SecretNameFor(org string) string { + return strings.ReplaceAll(c.SecretNamePattern, OrgPlaceholder, org) +} + +// PATSource returns a short human-readable label describing where the PAT +// will be sourced from. Useful for the startup log line. +func (c Config) PATSource() string { + if c.GiteaPATOverride != "" { + return "env" + } + return "keyvault" +} + +func requireField(errs *[]error, name, value string) { + if value == "" { + *errs = append(*errs, fmt.Errorf("%s is required", name)) + } +} + +func parseBool(raw string) bool { + switch strings.ToLower(strings.TrimSpace(raw)) { + case "1", "t", "true", "yes", "y": + return true + default: + return false + } +} + +func parseCSV(raw string) []string { + if raw == "" { + return nil + } + parts := strings.Split(raw, ",") + out := make([]string, 0, len(parts)) + seen := make(map[string]struct{}, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p == "" { + continue + } + if _, dup := seen[p]; dup { + continue + } + seen[p] = struct{}{} + out = append(out, p) + } + return out +} diff --git a/src/runner-org-sync/internal/config/config_test.go b/src/runner-org-sync/internal/config/config_test.go new file mode 100644 index 00000000000..205e5b39549 --- /dev/null +++ b/src/runner-org-sync/internal/config/config_test.go @@ -0,0 +1,185 @@ +package config + +import ( + "strings" + "testing" +) + +// validEnv returns a baseline env map representing a fully valid configuration. +// Tests mutate a copy to exercise one validation branch at a time. +func validEnv() map[string]string { + return map[string]string{ + EnvGiteaURL: "http://gitea.local", + EnvOrgsJSONURL: "https://altinncdn.no/orgs/altinn-orgs.json", + EnvOutputNamespace: "studio-runners", + EnvSecretNamePattern: "altinn-gitea-runner-{org}-secret", + EnvConfigMapName: "runner-org-list", + EnvKeyVaultName: "kv-studio", + EnvKeyVaultSecretName: "gitea-admin-pat", + EnvWhitelistedOrgs: "ttd,brg,dsb", + } +} + +func getter(env map[string]string) Getter { + return func(k string) string { return env[k] } +} + +func TestLoadFrom_Valid(t *testing.T) { + cfg, err := LoadFrom(getter(validEnv())) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.GiteaURL != "http://gitea.local" { + t.Errorf("GiteaURL = %q", cfg.GiteaURL) + } + if got, want := cfg.WhitelistedOrgs, []string{"ttd", "brg", "dsb"}; !equalSlice(got, want) { + t.Errorf("WhitelistedOrgs = %v, want %v", got, want) + } + if cfg.SyncAll { + t.Errorf("SyncAll = true, want false") + } + if cfg.PATSource() != "keyvault" { + t.Errorf("PATSource = %q, want keyvault", cfg.PATSource()) + } +} + +func TestLoadFrom_PATOverrideRelaxesKeyVaultRequirement(t *testing.T) { + env := validEnv() + delete(env, EnvKeyVaultName) + delete(env, EnvKeyVaultSecretName) + env[EnvGiteaPATOverride] = "pat-xyz" + + cfg, err := LoadFrom(getter(env)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.PATSource() != "env" { + t.Errorf("PATSource = %q, want env", cfg.PATSource()) + } +} + +func TestLoadFrom_RequiredFieldsAggregated(t *testing.T) { + _, err := LoadFrom(getter(map[string]string{})) + if err == nil { + t.Fatal("expected error, got nil") + } + msg := err.Error() + // All required fields plus the whitelist invariant should appear in one error. + wantSubstrings := []string{ + EnvGiteaURL, + EnvOrgsJSONURL, + EnvOutputNamespace, + EnvSecretNamePattern, + EnvConfigMapName, + EnvKeyVaultName, + EnvKeyVaultSecretName, + "either RUNNER_ORG_SYNC_SYNC_ALL=true", + } + for _, s := range wantSubstrings { + if !strings.Contains(msg, s) { + t.Errorf("error does not mention %q\n full error: %s", s, msg) + } + } +} + +func TestLoadFrom_SecretNamePatternMustContainPlaceholder(t *testing.T) { + env := validEnv() + env[EnvSecretNamePattern] = "altinn-gitea-runner-secret" // no {org} + + _, err := LoadFrom(getter(env)) + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), OrgPlaceholder) { + t.Errorf("error should mention %q placeholder; got: %v", OrgPlaceholder, err) + } +} + +func TestLoadFrom_SyncAllAcceptsEmptyWhitelist(t *testing.T) { + env := validEnv() + env[EnvSyncAll] = "true" + delete(env, EnvWhitelistedOrgs) + + cfg, err := LoadFrom(getter(env)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !cfg.SyncAll { + t.Errorf("SyncAll = false, want true") + } + if len(cfg.WhitelistedOrgs) != 0 { + t.Errorf("WhitelistedOrgs = %v, want empty", cfg.WhitelistedOrgs) + } +} + +func TestLoadFrom_RejectsEmptyWhitelistWhenSyncAllOff(t *testing.T) { + env := validEnv() + delete(env, EnvWhitelistedOrgs) + + _, err := LoadFrom(getter(env)) + if err == nil { + t.Fatal("expected error, got nil") + } +} + +func TestParseCSV(t *testing.T) { + cases := []struct { + in string + want []string + }{ + {"", nil}, + {"ttd", []string{"ttd"}}, + {"ttd,brg,dsb", []string{"ttd", "brg", "dsb"}}, + {" ttd , brg ,dsb ", []string{"ttd", "brg", "dsb"}}, + {"ttd,,brg", []string{"ttd", "brg"}}, + {",ttd,", []string{"ttd"}}, + {"ttd,brg,ttd", []string{"ttd", "brg"}}, // dedup + } + for _, c := range cases { + got := parseCSV(c.in) + if !equalSlice(got, c.want) { + t.Errorf("parseCSV(%q) = %v, want %v", c.in, got, c.want) + } + } +} + +func TestParseBool(t *testing.T) { + cases := map[string]bool{ + "": false, + "true": true, + "TRUE": true, + "True": true, + "1": true, + "yes": true, + "y": true, + "t": true, + "false": false, + "0": false, + "no": false, + "junk": false, + } + for in, want := range cases { + if got := parseBool(in); got != want { + t.Errorf("parseBool(%q) = %v, want %v", in, got, want) + } + } +} + +func TestSecretNameFor(t *testing.T) { + c := Config{SecretNamePattern: "altinn-gitea-runner-{org}-secret"} + if got, want := c.SecretNameFor("ttd"), "altinn-gitea-runner-ttd-secret"; got != want { + t.Errorf("SecretNameFor = %q, want %q", got, want) + } +} + +func equalSlice(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/src/runner-org-sync/internal/gitea/gitea.go b/src/runner-org-sync/internal/gitea/gitea.go new file mode 100644 index 00000000000..73b5f5928bf --- /dev/null +++ b/src/runner-org-sync/internal/gitea/gitea.go @@ -0,0 +1,126 @@ +// Package gitea is a minimal admin client for Gitea — just enough to mint +// per-organisation Actions runner registration tokens. +// +// The endpoint targeted is Gitea's organisation-scoped runner registration +// token API. The returned token is a one-shot string that an act_runner +// process uses to register itself with Gitea; once registered the runner +// keeps its own long-lived identity. +package gitea + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" +) + +const ( + defaultTimeout = 15 * time.Second + defaultUserAgent = "runner-org-sync" + maxErrorBody = 512 +) + +// Sentinel errors. Callers can errors.Is against these to drive reconcile +// policy (e.g. ErrUnauthorized → fatal; ErrOrgNotFound → skip & continue). +var ( + ErrUnauthorized = errors.New("gitea: unauthorized (bad PAT)") + ErrOrgNotFound = errors.New("gitea: organisation not found") + ErrServer = errors.New("gitea: server error") +) + +// Client talks to a Gitea instance using a Personal Access Token. +type Client struct { + httpClient *http.Client + baseURL string + pat string + userAgent string +} + +// Option configures a Client. +type Option func(*Client) + +// WithHTTPClient overrides the default HTTP client. +func WithHTTPClient(h *http.Client) Option { + return func(c *Client) { c.httpClient = h } +} + +// WithUserAgent overrides the User-Agent header. +func WithUserAgent(ua string) Option { + return func(c *Client) { c.userAgent = ua } +} + +// NewClient constructs a Client. baseURL should be the Gitea instance root +// (e.g. "http://altinn-repositories-public.default.svc.cluster.local"); the +// trailing slash is normalised away. +func NewClient(baseURL, pat string, opts ...Option) *Client { + c := &Client{ + httpClient: &http.Client{Timeout: defaultTimeout}, + baseURL: strings.TrimRight(baseURL, "/"), + pat: pat, + userAgent: defaultUserAgent, + } + for _, opt := range opts { + opt(c) + } + return c +} + +// MintRegistrationToken returns a fresh runner registration token for the +// given organisation. org is the short Gitea organisation name (e.g. "ttd"). +func (c *Client) MintRegistrationToken(ctx context.Context, org string) (string, error) { + if org == "" { + return "", errors.New("gitea: org is required") + } + endpoint := fmt.Sprintf("%s/api/v1/orgs/%s/actions/runners/registration-token", + c.baseURL, url.PathEscape(org)) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return "", fmt.Errorf("gitea: build request: %w", err) + } + req.Header.Set("Authorization", "token "+c.pat) + req.Header.Set("Accept", "application/json") + req.Header.Set("User-Agent", c.userAgent) + + resp, err := c.httpClient.Do(req) + if err != nil { + return "", fmt.Errorf("gitea: get registration token for %s: %w", org, err) + } + defer resp.Body.Close() + + switch { + case resp.StatusCode == http.StatusOK: + // fall through + case resp.StatusCode == http.StatusUnauthorized, resp.StatusCode == http.StatusForbidden: + return "", fmt.Errorf("%w: status %d", ErrUnauthorized, resp.StatusCode) + case resp.StatusCode == http.StatusNotFound: + return "", fmt.Errorf("%w: %s", ErrOrgNotFound, org) + case resp.StatusCode >= 500: + body := readErrorBody(resp.Body) + return "", fmt.Errorf("%w: status %d: %s", ErrServer, resp.StatusCode, body) + default: + body := readErrorBody(resp.Body) + return "", fmt.Errorf("gitea: unexpected status %d: %s", resp.StatusCode, body) + } + + var payload struct { + Token string `json:"token"` + } + if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { + return "", fmt.Errorf("gitea: decode response for %s: %w", org, err) + } + if payload.Token == "" { + return "", fmt.Errorf("gitea: empty token in response for %s", org) + } + return payload.Token, nil +} + +func readErrorBody(r io.Reader) string { + body, _ := io.ReadAll(io.LimitReader(r, maxErrorBody)) + return string(body) +} diff --git a/src/runner-org-sync/internal/gitea/gitea_test.go b/src/runner-org-sync/internal/gitea/gitea_test.go new file mode 100644 index 00000000000..41514748c34 --- /dev/null +++ b/src/runner-org-sync/internal/gitea/gitea_test.go @@ -0,0 +1,154 @@ +package gitea + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +func newStubServer(t *testing.T, handler http.HandlerFunc) *httptest.Server { + t.Helper() + s := httptest.NewServer(handler) + t.Cleanup(s.Close) + return s +} + +func TestMintRegistrationToken_Happy(t *testing.T) { + var gotPath, gotAuth, gotUA string + s := newStubServer(t, func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.Path + gotAuth = r.Header.Get("Authorization") + gotUA = r.Header.Get("User-Agent") + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"token":"reg-token-abc"}`)) + }) + + c := NewClient(s.URL+"/", "pat-xyz", WithUserAgent("ua-test")) + token, err := c.MintRegistrationToken(context.Background(), "ttd") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if token != "reg-token-abc" { + t.Errorf("token = %q, want reg-token-abc", token) + } + if want := "/api/v1/orgs/ttd/actions/runners/registration-token"; gotPath != want { + t.Errorf("path = %q, want %q", gotPath, want) + } + if gotAuth != "token pat-xyz" { + t.Errorf("Authorization = %q, want %q", gotAuth, "token pat-xyz") + } + if gotUA != "ua-test" { + t.Errorf("User-Agent = %q, want ua-test", gotUA) + } +} + +func TestMintRegistrationToken_PathEscaped(t *testing.T) { + // Gitea org names are validated, but defence in depth: ensure path escaping + // is applied so a hostile or malformed org code cannot construct a URL. + var gotPath string + s := newStubServer(t, func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.EscapedPath() + _, _ = w.Write([]byte(`{"token":"x"}`)) + }) + c := NewClient(s.URL, "pat") + if _, err := c.MintRegistrationToken(context.Background(), "weird/org"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(gotPath, "weird%2Forg") { + t.Errorf("path did not escape slash: %q", gotPath) + } +} + +func TestMintRegistrationToken_Unauthorized(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + _, _ = w.Write([]byte("nope")) + }) + c := NewClient(s.URL, "bad-pat") + _, err := c.MintRegistrationToken(context.Background(), "ttd") + if !errors.Is(err, ErrUnauthorized) { + t.Errorf("want ErrUnauthorized, got %v", err) + } +} + +func TestMintRegistrationToken_Forbidden(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusForbidden) + }) + c := NewClient(s.URL, "pat") + _, err := c.MintRegistrationToken(context.Background(), "ttd") + if !errors.Is(err, ErrUnauthorized) { + t.Errorf("want ErrUnauthorized, got %v", err) + } +} + +func TestMintRegistrationToken_NotFound(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + c := NewClient(s.URL, "pat") + _, err := c.MintRegistrationToken(context.Background(), "missing-org") + if !errors.Is(err, ErrOrgNotFound) { + t.Errorf("want ErrOrgNotFound, got %v", err) + } +} + +func TestMintRegistrationToken_ServerError(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte("kaboom")) + }) + c := NewClient(s.URL, "pat") + _, err := c.MintRegistrationToken(context.Background(), "ttd") + if !errors.Is(err, ErrServer) { + t.Errorf("want ErrServer, got %v", err) + } +} + +func TestMintRegistrationToken_EmptyToken(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{"token":""}`)) + }) + c := NewClient(s.URL, "pat") + _, err := c.MintRegistrationToken(context.Background(), "ttd") + if err == nil || !strings.Contains(err.Error(), "empty token") { + t.Errorf("want empty-token error, got %v", err) + } +} + +func TestMintRegistrationToken_MalformedJSON(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{not json`)) + }) + c := NewClient(s.URL, "pat") + _, err := c.MintRegistrationToken(context.Background(), "ttd") + if err == nil || !strings.Contains(err.Error(), "decode") { + t.Errorf("want decode error, got %v", err) + } +} + +func TestMintRegistrationToken_EmptyOrg(t *testing.T) { + c := NewClient("http://example", "pat") + _, err := c.MintRegistrationToken(context.Background(), "") + if err == nil { + t.Fatal("expected error for empty org, got nil") + } +} + +func TestMintRegistrationToken_ContextCancelled(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + time.Sleep(200 * time.Millisecond) + _, _ = w.Write([]byte(`{"token":"x"}`)) + }) + c := NewClient(s.URL, "pat") + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond) + defer cancel() + _, err := c.MintRegistrationToken(ctx, "ttd") + if err == nil { + t.Fatal("expected error from cancelled context, got nil") + } +} diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate.go b/src/runner-org-sync/internal/k8sstate/k8sstate.go new file mode 100644 index 00000000000..140299c73b0 --- /dev/null +++ b/src/runner-org-sync/internal/k8sstate/k8sstate.go @@ -0,0 +1,164 @@ +// Package k8sstate provides the in-cluster reconcile primitives used by the +// runner-org-sync CronJob: listing the Secrets we own, creating and +// deleting per-org registration-token Secrets, and applying the runners +// ConfigMap idempotently. +// +// The Store is constructed around a kubernetes.Interface so the production +// path uses a real REST client while tests inject the fake clientset from +// k8s.io/client-go/kubernetes/fake. +package k8sstate + +import ( + "context" + "fmt" + "maps" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +// Label keys and well-known values for resources this service owns. +const ( + LabelManagedBy = "app.kubernetes.io/managed-by" + LabelComponent = "app.kubernetes.io/component" + LabelOrg = "runner-org-sync.altinn.studio/org" + + ManagedBy = "runner-org-sync" + ComponentRegToken = "runner-registration-token" + ComponentRunnerCM = "runner-org-list" + + // SecretTokenKey is the data key inside per-org registration Secrets, + // matching what the runner Deployment's secretKeyRef expects. + SecretTokenKey = "token" +) + +// Store is the package's only entry point for cluster I/O. +type Store struct { + client kubernetes.Interface + namespace string +} + +// NewStore constructs a Store bound to a single namespace. +func NewStore(client kubernetes.Interface, namespace string) *Store { + return &Store{client: client, namespace: namespace} +} + +// Namespace returns the namespace the Store operates in. Useful for logs. +func (s *Store) Namespace() string { return s.namespace } + +// ListManagedSecrets returns all Secrets in the namespace that this service +// owns, matched by ManagedBy + Component labels. +func (s *Store) ListManagedSecrets(ctx context.Context) ([]corev1.Secret, error) { + selector := fmt.Sprintf("%s=%s,%s=%s", + LabelManagedBy, ManagedBy, + LabelComponent, ComponentRegToken, + ) + list, err := s.client.CoreV1().Secrets(s.namespace).List(ctx, metav1.ListOptions{ + LabelSelector: selector, + }) + if err != nil { + return nil, fmt.Errorf("k8sstate: list secrets: %w", err) + } + return list.Items, nil +} + +// SecretExists reports whether a Secret with the given name exists in the +// store's namespace. A NotFound error is reported as ok=false, nil error. +func (s *Store) SecretExists(ctx context.Context, name string) (bool, error) { + _, err := s.client.CoreV1().Secrets(s.namespace).Get(ctx, name, metav1.GetOptions{}) + if err == nil { + return true, nil + } + if apierrors.IsNotFound(err) { + return false, nil + } + return false, fmt.Errorf("k8sstate: get secret %s: %w", name, err) +} + +// CreateRegistrationSecret creates an Opaque Secret carrying the +// registration token at key "token", labelled with ManagedBy / Component / +// Org. Returns the underlying error verbatim so callers can use apierrors.IsAlreadyExists. +func (s *Store) CreateRegistrationSecret(ctx context.Context, name, org, token string) error { + sec := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: s.namespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRegToken, + LabelOrg: org, + }, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{ + SecretTokenKey: []byte(token), + }, + } + if _, err := s.client.CoreV1().Secrets(s.namespace).Create(ctx, sec, metav1.CreateOptions{}); err != nil { + return fmt.Errorf("k8sstate: create secret %s: %w", name, err) + } + return nil +} + +// DeleteSecret removes the named Secret. NotFound is treated as success so +// the operation is idempotent across reconciles. +func (s *Store) DeleteSecret(ctx context.Context, name string) error { + err := s.client.CoreV1().Secrets(s.namespace).Delete(ctx, name, metav1.DeleteOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("k8sstate: delete secret %s: %w", name, err) + } + return nil +} + +// ApplyConfigMap creates or updates the named ConfigMap so its Data matches +// the supplied value. Returns true if a write actually occurred (create or +// update), false if the existing object already matched. Labels are +// preserved on update; the managed-by label is added if missing. +func (s *Store) ApplyConfigMap(ctx context.Context, name string, data map[string]string) (bool, error) { + desired := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: s.namespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRunnerCM, + }, + }, + Data: data, + } + + existing, err := s.client.CoreV1().ConfigMaps(s.namespace).Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + if _, err := s.client.CoreV1().ConfigMaps(s.namespace).Create(ctx, desired, metav1.CreateOptions{}); err != nil { + return false, fmt.Errorf("k8sstate: create configmap %s: %w", name, err) + } + return true, nil + } + if err != nil { + return false, fmt.Errorf("k8sstate: get configmap %s: %w", name, err) + } + + if maps.Equal(existing.Data, data) { + return false, nil + } + existing.Data = data + if existing.Labels == nil { + existing.Labels = map[string]string{} + } + existing.Labels[LabelManagedBy] = ManagedBy + existing.Labels[LabelComponent] = ComponentRunnerCM + + if _, err := s.client.CoreV1().ConfigMaps(s.namespace).Update(ctx, existing, metav1.UpdateOptions{}); err != nil { + return false, fmt.Errorf("k8sstate: update configmap %s: %w", name, err) + } + return true, nil +} + +// OrgFromSecret extracts the org code from a managed Secret's label. Returns +// the empty string if the label is missing — callers should treat that as +// a foreign Secret and skip it. +func OrgFromSecret(s corev1.Secret) string { + return s.Labels[LabelOrg] +} diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go new file mode 100644 index 00000000000..4096aa221dd --- /dev/null +++ b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go @@ -0,0 +1,210 @@ +package k8sstate + +import ( + "context" + "sort" + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" +) + +const testNamespace = "studio-runners" + +func TestCreateRegistrationSecret_SetsLabelsAndData(t *testing.T) { + c := fake.NewSimpleClientset() + s := NewStore(c, testNamespace) + + if err := s.CreateRegistrationSecret(context.Background(), "altinn-gitea-runner-ttd-secret", "ttd", "tok-1"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + got, err := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "altinn-gitea-runner-ttd-secret", metav1.GetOptions{}) + if err != nil { + t.Fatalf("get failed: %v", err) + } + if got.Type != corev1.SecretTypeOpaque { + t.Errorf("type = %v, want Opaque", got.Type) + } + if string(got.Data[SecretTokenKey]) != "tok-1" { + t.Errorf("data[%s] = %q, want %q", SecretTokenKey, string(got.Data[SecretTokenKey]), "tok-1") + } + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by = %q, want %q", got.Labels[LabelManagedBy], ManagedBy) + } + if got.Labels[LabelComponent] != ComponentRegToken { + t.Errorf("component = %q, want %q", got.Labels[LabelComponent], ComponentRegToken) + } + if got.Labels[LabelOrg] != "ttd" { + t.Errorf("org label = %q, want ttd", got.Labels[LabelOrg]) + } +} + +func TestCreateRegistrationSecret_AlreadyExists(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "x", Namespace: testNamespace}, + }) + s := NewStore(c, testNamespace) + err := s.CreateRegistrationSecret(context.Background(), "x", "ttd", "tok") + if err == nil { + t.Fatal("expected error for duplicate, got nil") + } +} + +func TestSecretExists(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "exists", Namespace: testNamespace}, + }) + s := NewStore(c, testNamespace) + + ok, err := s.SecretExists(context.Background(), "exists") + if err != nil || !ok { + t.Errorf("SecretExists(exists) = %v, %v; want true, nil", ok, err) + } + ok, err = s.SecretExists(context.Background(), "missing") + if err != nil || ok { + t.Errorf("SecretExists(missing) = %v, %v; want false, nil", ok, err) + } +} + +func TestDeleteSecret_IdempotentOnMissing(t *testing.T) { + c := fake.NewSimpleClientset() + s := NewStore(c, testNamespace) + if err := s.DeleteSecret(context.Background(), "never-existed"); err != nil { + t.Errorf("delete missing should be nil, got %v", err) + } +} + +func TestDeleteSecret_RemovesExisting(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "x", Namespace: testNamespace}, + }) + s := NewStore(c, testNamespace) + if err := s.DeleteSecret(context.Background(), "x"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + ok, _ := s.SecretExists(context.Background(), "x") + if ok { + t.Errorf("secret still exists after delete") + } +} + +func TestListManagedSecrets_OnlyOurs(t *testing.T) { + managed1 := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ours-ttd", Namespace: testNamespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRegToken, + LabelOrg: "ttd", + }, + }, + } + managed2 := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ours-brg", Namespace: testNamespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRegToken, + LabelOrg: "brg", + }, + }, + } + foreign := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "stranger", Namespace: testNamespace, + Labels: map[string]string{LabelManagedBy: "someone-else"}, + }, + } + c := fake.NewSimpleClientset(managed1, managed2, foreign) + s := NewStore(c, testNamespace) + + got, err := s.ListManagedSecrets(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got) != 2 { + t.Fatalf("got %d secrets, want 2", len(got)) + } + names := []string{got[0].Name, got[1].Name} + sort.Strings(names) + if names[0] != "ours-brg" || names[1] != "ours-ttd" { + t.Errorf("got secrets %v, want [ours-brg ours-ttd]", names) + } +} + +func TestApplyConfigMap_CreatesWhenMissing(t *testing.T) { + c := fake.NewSimpleClientset() + s := NewStore(c, testNamespace) + + changed, err := s.ApplyConfigMap(context.Background(), "runner-org-list", map[string]string{"runners.yaml": "- name: ttd\n"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (create)") + } + cm, err := c.CoreV1().ConfigMaps(testNamespace).Get(context.Background(), "runner-org-list", metav1.GetOptions{}) + if err != nil { + t.Fatalf("get: %v", err) + } + if cm.Data["runners.yaml"] != "- name: ttd\n" { + t.Errorf("data wrong: %v", cm.Data) + } + if cm.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by label missing, got %v", cm.Labels) + } +} + +func TestApplyConfigMap_NoOpOnSameContent(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cm", Namespace: testNamespace, + Labels: map[string]string{LabelManagedBy: ManagedBy}, + }, + Data: map[string]string{"k": "v"}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyConfigMap(context.Background(), "cm", map[string]string{"k": "v"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if changed { + t.Error("changed = true, want false (no diff)") + } +} + +func TestApplyConfigMap_UpdatesOnDifference(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "cm", Namespace: testNamespace}, + Data: map[string]string{"k": "old"}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyConfigMap(context.Background(), "cm", map[string]string{"k": "new"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (update)") + } + got, _ := c.CoreV1().ConfigMaps(testNamespace).Get(context.Background(), "cm", metav1.GetOptions{}) + if got.Data["k"] != "new" { + t.Errorf("data not updated: %v", got.Data) + } + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by label was not added on update, got %v", got.Labels) + } +} + +func TestOrgFromSecret(t *testing.T) { + s := corev1.Secret{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{LabelOrg: "ttd"}}} + if got := OrgFromSecret(s); got != "ttd" { + t.Errorf("OrgFromSecret = %q, want ttd", got) + } + if got := OrgFromSecret(corev1.Secret{}); got != "" { + t.Errorf("OrgFromSecret on unlabelled secret = %q, want empty", got) + } +} diff --git a/src/runner-org-sync/internal/keyvault/keyvault.go b/src/runner-org-sync/internal/keyvault/keyvault.go new file mode 100644 index 00000000000..1935ae989a6 --- /dev/null +++ b/src/runner-org-sync/internal/keyvault/keyvault.go @@ -0,0 +1,113 @@ +// Package keyvault resolves the Gitea Personal Access Token. +// +// Two sources are supported: +// +// - an env-var override (for local development; sidesteps Azure auth entirely), +// - Azure Key Vault, accessed via DefaultAzureCredential, which in-cluster +// resolves to the Workload Identity federated token automatically. +// +// The Loader returns the resolved PAT along with a Source label so callers +// can log where the value came from (an accidental env-var fallback in +// production is then immediately visible). +package keyvault + +import ( + "context" + "errors" + "fmt" + + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + "github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets" +) + +// Source describes where a PAT came from. +type Source string + +const ( + SourceEnv Source = "env" + SourceKeyVault Source = "keyvault" +) + +// ErrNoSource is returned when the loader has neither an env override nor a +// configured Key Vault Getter. +var ErrNoSource = errors.New("keyvault: no env override and no Key Vault getter configured") + +// Getter abstracts secret retrieval. The production implementation wraps +// the Azure SDK; tests inject a stub. +type Getter interface { + GetSecret(ctx context.Context, secretName string) (string, error) +} + +// Loader resolves a PAT, preferring the env override over the Key Vault +// path. Construct with NewLoader; the zero value is not usable. +type Loader struct { + envOverride string + getter Getter + secretName string +} + +// NewLoader creates a Loader. Pass an empty envOverride to disable the +// override and force the Key Vault path. Pass a nil getter when only the +// env path is configured (Load will return ErrNoSource if it has to fall +// back to Key Vault). +func NewLoader(envOverride string, getter Getter, secretName string) *Loader { + return &Loader{ + envOverride: envOverride, + getter: getter, + secretName: secretName, + } +} + +// Load resolves the PAT. It returns the value, the source it came from, and +// any error. Source is meaningful only when err is nil. +func (l *Loader) Load(ctx context.Context) (string, Source, error) { + if l.envOverride != "" { + return l.envOverride, SourceEnv, nil + } + if l.getter == nil { + return "", "", ErrNoSource + } + v, err := l.getter.GetSecret(ctx, l.secretName) + if err != nil { + return "", "", err + } + if v == "" { + return "", "", fmt.Errorf("keyvault: secret %q has empty value", l.secretName) + } + return v, SourceKeyVault, nil +} + +// NewAzureGetter constructs a production Getter backed by Azure Key Vault. +// It uses DefaultAzureCredential, which inside an AKS pod with the Workload +// Identity webhook prefers the federated token. vaultName is the short name +// (e.g. "studio-kv"), not the full URL. +func NewAzureGetter(vaultName string) (Getter, error) { + if vaultName == "" { + return nil, errors.New("keyvault: vaultName is required") + } + cred, err := azidentity.NewDefaultAzureCredential(nil) + if err != nil { + return nil, fmt.Errorf("keyvault: build credential: %w", err) + } + vaultURL := fmt.Sprintf("https://%s.vault.azure.net", vaultName) + client, err := azsecrets.NewClient(vaultURL, cred, nil) + if err != nil { + return nil, fmt.Errorf("keyvault: build secrets client: %w", err) + } + return &azureGetter{client: client}, nil +} + +type azureGetter struct { + client *azsecrets.Client +} + +func (g *azureGetter) GetSecret(ctx context.Context, name string) (string, error) { + resp, err := g.client.GetSecret(ctx, name, "", nil) + if err != nil { + return "", fmt.Errorf("keyvault: GetSecret %q: %w", name, err) + } + if resp.Value == nil { + return "", nil + } + return *resp.Value, nil +} diff --git a/src/runner-org-sync/internal/keyvault/keyvault_test.go b/src/runner-org-sync/internal/keyvault/keyvault_test.go new file mode 100644 index 00000000000..eeed4951bce --- /dev/null +++ b/src/runner-org-sync/internal/keyvault/keyvault_test.go @@ -0,0 +1,99 @@ +package keyvault + +import ( + "context" + "errors" + "strings" + "testing" +) + +// stubGetter records how it was called and returns a canned response. +type stubGetter struct { + value string + err error + gotCtx context.Context + gotSecret string + calls int +} + +func (s *stubGetter) GetSecret(ctx context.Context, name string) (string, error) { + s.calls++ + s.gotCtx = ctx + s.gotSecret = name + return s.value, s.err +} + +func TestLoad_EnvOverridePrefersOverGetter(t *testing.T) { + getter := &stubGetter{value: "from-kv"} + l := NewLoader("override-pat", getter, "kv-secret-name") + + val, src, err := l.Load(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if val != "override-pat" { + t.Errorf("value = %q, want override-pat", val) + } + if src != SourceEnv { + t.Errorf("source = %q, want %q", src, SourceEnv) + } + if getter.calls != 0 { + t.Errorf("getter should not be called when env override is set; got %d calls", getter.calls) + } +} + +func TestLoad_KeyVaultPath(t *testing.T) { + getter := &stubGetter{value: "from-kv"} + l := NewLoader("", getter, "gitea-admin-pat") + + val, src, err := l.Load(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if val != "from-kv" { + t.Errorf("value = %q, want from-kv", val) + } + if src != SourceKeyVault { + t.Errorf("source = %q, want %q", src, SourceKeyVault) + } + if getter.gotSecret != "gitea-admin-pat" { + t.Errorf("getter called with secret %q, want gitea-admin-pat", getter.gotSecret) + } +} + +func TestLoad_NoOverrideNoGetter(t *testing.T) { + l := NewLoader("", nil, "name") + _, _, err := l.Load(context.Background()) + if !errors.Is(err, ErrNoSource) { + t.Errorf("want ErrNoSource, got %v", err) + } +} + +func TestLoad_GetterError(t *testing.T) { + wantErr := errors.New("kv down") + getter := &stubGetter{err: wantErr} + l := NewLoader("", getter, "name") + _, _, err := l.Load(context.Background()) + if !errors.Is(err, wantErr) { + t.Errorf("expected wrapped error, got %v", err) + } +} + +func TestLoad_EmptyValueFromKeyVault(t *testing.T) { + getter := &stubGetter{value: ""} + l := NewLoader("", getter, "name") + _, _, err := l.Load(context.Background()) + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), "empty value") { + t.Errorf("error should mention empty value; got %v", err) + } +} + +func TestNewAzureGetter_RejectsEmptyVaultName(t *testing.T) { + _, err := NewAzureGetter("") + if err == nil { + t.Fatal("expected error for empty vault name, got nil") + } +} diff --git a/src/runner-org-sync/internal/reconcile/reconcile.go b/src/runner-org-sync/internal/reconcile/reconcile.go new file mode 100644 index 00000000000..cb20d9ec460 --- /dev/null +++ b/src/runner-org-sync/internal/reconcile/reconcile.go @@ -0,0 +1,296 @@ +// Package reconcile implements the pure orchestration loop of +// runner-org-sync: fetch the org list, filter it, diff against the cluster, +// mint missing tokens, delete unwanted Secrets, and project the runners +// ConfigMap. +// +// The Reconciler depends on small interfaces and never imports OpenTelemetry +// or slog — observability is the caller's responsibility, driven by the +// Report returned from Run. This keeps unit tests free of any global setup. +package reconcile + +import ( + "context" + "errors" + "fmt" + "sort" + "strings" + + "altinn.studio/runner-org-sync/internal/cdn" + corev1 "k8s.io/api/core/v1" +) + +// Defaults used when the caller does not override. +const ( + DefaultReplicas = 1 + ConfigMapDataKey = "runners.yaml" + FilterReasonNoEnv = "no_environments" + FilterReasonWhitelist = "not_in_whitelist" +) + +// Failure stages, surfaced on Report.FailedOrgs[*].Stage. +const ( + StageMint = "mint" + StageCreate = "create" + StageDelete = "delete" +) + +// OrgSource produces the discovered org population (typically the CDN client). +type OrgSource interface { + Fetch(ctx context.Context) ([]cdn.Org, error) +} + +// TokenMinter produces a fresh registration token for an org. The +// implementation must be safe to call concurrently with itself; the +// Reconciler currently calls it serially, but that may change. +type TokenMinter interface { + MintRegistrationToken(ctx context.Context, org string) (string, error) +} + +// SecretStore is the cluster I/O surface the Reconciler needs. +type SecretStore interface { + ListManagedSecrets(ctx context.Context) ([]corev1.Secret, error) + SecretExists(ctx context.Context, name string) (bool, error) + CreateRegistrationSecret(ctx context.Context, name, org, token string) error + DeleteSecret(ctx context.Context, name string) error + ApplyConfigMap(ctx context.Context, name string, data map[string]string) (bool, error) +} + +// Outcome summarises how a run ended. +type Outcome string + +const ( + OutcomeSuccess Outcome = "success" + OutcomePartial Outcome = "partial" + OutcomeFailure Outcome = "failure" +) + +// OrgFailure records a single per-org error captured during reconciliation. +// It does not abort the run; the org is simply omitted from this tick's +// ConfigMap so the chart never references a Secret that does not exist. +type OrgFailure struct { + Org string + Stage string + Err error +} + +// Report is the structured result of a single Run. The caller derives all +// telemetry (logs, metrics, span events) from this value. +type Report struct { + Outcome Outcome + Discovered int + FilteredNoEnv []string + FilteredWhitelist []string + Desired []string + SecretsCreated []string + SecretsDeleted []string + SecretsSkipped []string + FailedOrgs []OrgFailure + ConfigMapChanged bool +} + +// Reconciler ties the dependencies together. Construct with New. +type Reconciler struct { + source OrgSource + minter TokenMinter + store SecretStore + secretNameFor func(org string) string + configMapName string + whitelist map[string]struct{} + syncAll bool +} + +// Options configure a Reconciler. The zero value is invalid; all four +// dependency fields are required by New. +type Options struct { + Source OrgSource + Minter TokenMinter + Store SecretStore + SecretNameFor func(org string) string + ConfigMapName string + Whitelist []string // empty + SyncAll=false → error at construction + SyncAll bool +} + +// New constructs a Reconciler from validated Options. +func New(opts Options) (*Reconciler, error) { + switch { + case opts.Source == nil: + return nil, errors.New("reconcile: Source is required") + case opts.Minter == nil: + return nil, errors.New("reconcile: Minter is required") + case opts.Store == nil: + return nil, errors.New("reconcile: Store is required") + case opts.SecretNameFor == nil: + return nil, errors.New("reconcile: SecretNameFor is required") + case opts.ConfigMapName == "": + return nil, errors.New("reconcile: ConfigMapName is required") + case !opts.SyncAll && len(opts.Whitelist) == 0: + return nil, errors.New("reconcile: either SyncAll=true or a non-empty Whitelist is required") + } + wl := make(map[string]struct{}, len(opts.Whitelist)) + for _, w := range opts.Whitelist { + wl[w] = struct{}{} + } + return &Reconciler{ + source: opts.Source, + minter: opts.Minter, + store: opts.Store, + secretNameFor: opts.SecretNameFor, + configMapName: opts.ConfigMapName, + whitelist: wl, + syncAll: opts.SyncAll, + }, nil +} + +// Run executes one full reconciliation cycle. It returns a non-nil error +// only for fatal failures (CDN unreachable, listing Secrets fails, applying +// the ConfigMap fails). Per-org failures are captured in Report.FailedOrgs; +// the function still returns nil error and Outcome=Partial so the CronJob +// exits zero and the next tick retries. +func (r *Reconciler) Run(ctx context.Context) (Report, error) { + report := Report{Outcome: OutcomeFailure} + + orgs, err := r.source.Fetch(ctx) + if err != nil { + return report, fmt.Errorf("reconcile: fetch orgs: %w", err) + } + report.Discovered = len(orgs) + + desired := r.filter(orgs, &report) + report.Desired = orgCodes(desired) + sort.Strings(report.Desired) + + existing, err := r.store.ListManagedSecrets(ctx) + if err != nil { + return report, fmt.Errorf("reconcile: list managed secrets: %w", err) + } + + // For each desired org, ensure its Secret exists. Per-org failures are + // recorded but do not abort the run. + orgHasSecret := make(map[string]bool, len(desired)) + for _, org := range desired { + name := r.secretNameFor(org.Code) + exists, err := r.store.SecretExists(ctx, name) + if err != nil { + // SecretExists hitting a transient apiserver error is fatal for + // this run — without this lookup we cannot decide mint-or-skip. + return report, fmt.Errorf("reconcile: check secret %s: %w", name, err) + } + if exists { + report.SecretsSkipped = append(report.SecretsSkipped, org.Code) + orgHasSecret[org.Code] = true + continue + } + token, err := r.minter.MintRegistrationToken(ctx, org.Code) + if err != nil { + report.FailedOrgs = append(report.FailedOrgs, OrgFailure{Org: org.Code, Stage: StageMint, Err: err}) + continue + } + if err := r.store.CreateRegistrationSecret(ctx, name, org.Code, token); err != nil { + report.FailedOrgs = append(report.FailedOrgs, OrgFailure{Org: org.Code, Stage: StageCreate, Err: err}) + continue + } + report.SecretsCreated = append(report.SecretsCreated, org.Code) + orgHasSecret[org.Code] = true + } + + // Delete Secrets we own whose org is no longer desired. + desiredSet := make(map[string]struct{}, len(desired)) + for _, o := range desired { + desiredSet[o.Code] = struct{}{} + } + for _, sec := range existing { + org := sec.Labels["runner-org-sync.altinn.studio/org"] + if org == "" { + // Defence in depth: a managed Secret missing the org label is a + // drift signal; skip rather than delete on uncertain attribution. + continue + } + if _, keep := desiredSet[org]; keep { + continue + } + if err := r.store.DeleteSecret(ctx, sec.Name); err != nil { + report.FailedOrgs = append(report.FailedOrgs, OrgFailure{Org: org, Stage: StageDelete, Err: err}) + continue + } + report.SecretsDeleted = append(report.SecretsDeleted, org) + } + + // Project the ConfigMap from orgs whose Secret currently exists. This is + // what guarantees the chart never points at a missing Secret: if a mint + // failed earlier this run, the org silently drops out this tick. + projected := make([]string, 0, len(desired)) + for _, o := range desired { + if orgHasSecret[o.Code] { + projected = append(projected, o.Code) + } + } + sort.Strings(projected) + sort.Strings(report.SecretsCreated) + sort.Strings(report.SecretsDeleted) + sort.Strings(report.SecretsSkipped) + + data := map[string]string{ + ConfigMapDataKey: renderRunners(projected, r.secretNameFor), + } + changed, err := r.store.ApplyConfigMap(ctx, r.configMapName, data) + if err != nil { + return report, fmt.Errorf("reconcile: apply configmap %s: %w", r.configMapName, err) + } + report.ConfigMapChanged = changed + + if len(report.FailedOrgs) > 0 { + report.Outcome = OutcomePartial + } else { + report.Outcome = OutcomeSuccess + } + return report, nil +} + +// filter applies the environments-non-empty and whitelist filters, +// recording filtered-out org codes in the report for visibility. +func (r *Reconciler) filter(orgs []cdn.Org, report *Report) []cdn.Org { + out := make([]cdn.Org, 0, len(orgs)) + for _, o := range orgs { + if len(o.Environments) == 0 { + report.FilteredNoEnv = append(report.FilteredNoEnv, o.Code) + continue + } + if !r.syncAll { + if _, ok := r.whitelist[o.Code]; !ok { + report.FilteredWhitelist = append(report.FilteredWhitelist, o.Code) + continue + } + } + out = append(out, o) + } + sort.Strings(report.FilteredNoEnv) + sort.Strings(report.FilteredWhitelist) + sort.Slice(out, func(i, j int) bool { return out[i].Code < out[j].Code }) + return out +} + +// renderRunners emits the YAML list consumed by the gitea-org-runner-config +// HelmRelease via Flux valuesFrom (targetPath: runners). Determinism via +// sorted input is required so unchanged state produces unchanged output and +// ApplyConfigMap detects "no change" correctly. +func renderRunners(orgs []string, secretNameFor func(org string) string) string { + if len(orgs) == 0 { + return "[]\n" + } + var b strings.Builder + for _, org := range orgs { + fmt.Fprintf(&b, "- name: %s\n", org) + fmt.Fprintf(&b, " replicas: %d\n", DefaultReplicas) + fmt.Fprintf(&b, " registrationTokenSecretName: %s\n", secretNameFor(org)) + } + return b.String() +} + +func orgCodes(orgs []cdn.Org) []string { + out := make([]string, 0, len(orgs)) + for _, o := range orgs { + out = append(out, o.Code) + } + return out +} diff --git a/src/runner-org-sync/internal/reconcile/reconcile_test.go b/src/runner-org-sync/internal/reconcile/reconcile_test.go new file mode 100644 index 00000000000..e46e15125af --- /dev/null +++ b/src/runner-org-sync/internal/reconcile/reconcile_test.go @@ -0,0 +1,476 @@ +package reconcile + +import ( + "context" + "errors" + "sort" + "strings" + "testing" + + "altinn.studio/runner-org-sync/internal/cdn" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// --- stub dependencies ------------------------------------------------------ + +type stubSource struct { + orgs []cdn.Org + err error +} + +func (s *stubSource) Fetch(_ context.Context) ([]cdn.Org, error) { return s.orgs, s.err } + +type stubMinter struct { + // per-org overrides: token to return or error to raise. + tokens map[string]string + errs map[string]error + calls []string +} + +func (m *stubMinter) MintRegistrationToken(_ context.Context, org string) (string, error) { + m.calls = append(m.calls, org) + if err, ok := m.errs[org]; ok { + return "", err + } + if t, ok := m.tokens[org]; ok { + return t, nil + } + return "tok-" + org, nil +} + +type stubStore struct { + managed []corev1.Secret + existsByName map[string]bool + createErr map[string]error + deleteErr map[string]error + applyCMErr error + listErr error + existsErr error + createdSecrets []string + createdOrgs map[string]string + deletedSecrets []string + appliedCMData map[string]string + appliedCMChange bool +} + +func newStubStore() *stubStore { + return &stubStore{ + existsByName: map[string]bool{}, + createErr: map[string]error{}, + deleteErr: map[string]error{}, + createdOrgs: map[string]string{}, + appliedCMChange: true, + } +} + +func (s *stubStore) ListManagedSecrets(_ context.Context) ([]corev1.Secret, error) { + return s.managed, s.listErr +} + +func (s *stubStore) SecretExists(_ context.Context, name string) (bool, error) { + if s.existsErr != nil { + return false, s.existsErr + } + return s.existsByName[name], nil +} + +func (s *stubStore) CreateRegistrationSecret(_ context.Context, name, org, _ string) error { + if err, ok := s.createErr[name]; ok { + return err + } + s.createdSecrets = append(s.createdSecrets, name) + s.createdOrgs[name] = org + s.existsByName[name] = true + return nil +} + +func (s *stubStore) DeleteSecret(_ context.Context, name string) error { + if err, ok := s.deleteErr[name]; ok { + return err + } + s.deletedSecrets = append(s.deletedSecrets, name) + return nil +} + +func (s *stubStore) ApplyConfigMap(_ context.Context, _ string, data map[string]string) (bool, error) { + if s.applyCMErr != nil { + return false, s.applyCMErr + } + s.appliedCMData = data + return s.appliedCMChange, nil +} + +// --- helpers ---------------------------------------------------------------- + +func secretNameFor(org string) string { return "altinn-gitea-runner-" + org + "-secret" } + +func managedSecret(name, org string) corev1.Secret { + return corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "runner-org-sync.altinn.studio/org": org, + "app.kubernetes.io/managed-by": "runner-org-sync", + }, + }, + } +} + +func runReconciler(t *testing.T, src *stubSource, minter *stubMinter, store *stubStore, whitelist []string, syncAll bool) Report { + t.Helper() + r, err := New(Options{ + Source: src, + Minter: minter, + Store: store, + SecretNameFor: secretNameFor, + ConfigMapName: "runner-org-list", + Whitelist: whitelist, + SyncAll: syncAll, + }) + if err != nil { + t.Fatalf("New: %v", err) + } + rep, err := r.Run(context.Background()) + if err != nil { + t.Fatalf("Run: %v", err) + } + return rep +} + +// --- scenarios from the design ---------------------------------------------- + +// Scenario 1: cold start, three orgs, all desired, no existing Secrets. +func TestRun_ColdStart(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02", "production"}}, + {Code: "brg", Environments: []string{"production"}}, + {Code: "dsb", Environments: []string{"tt02"}}, + }} + minter := &stubMinter{} + store := newStubStore() + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg", "dsb"}, false) + + if rep.Outcome != OutcomeSuccess { + t.Errorf("outcome = %v, want success", rep.Outcome) + } + wantCreated := []string{"brg", "dsb", "ttd"} + if !equalSlice(rep.SecretsCreated, wantCreated) { + t.Errorf("SecretsCreated = %v, want %v", rep.SecretsCreated, wantCreated) + } + if !equalSlice(minter.calls, wantCreated) { + t.Errorf("minter calls = %v, want %v (sorted)", minter.calls, wantCreated) + } + if !rep.ConfigMapChanged { + t.Errorf("ConfigMapChanged = false, want true on cold start") + } + wantBody := strings.Join([]string{ + "- name: brg", + " replicas: 1", + " registrationTokenSecretName: altinn-gitea-runner-brg-secret", + "- name: dsb", + " replicas: 1", + " registrationTokenSecretName: altinn-gitea-runner-dsb-secret", + "- name: ttd", + " replicas: 1", + " registrationTokenSecretName: altinn-gitea-runner-ttd-secret", + "", + }, "\n") + if got := store.appliedCMData[ConfigMapDataKey]; got != wantBody { + t.Errorf("ConfigMap body =\n%q\nwant\n%q", got, wantBody) + } +} + +// Scenario 2: re-run with no upstream change → no Secret writes, no mint calls. +func TestRun_IdempotentReRun(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "brg", Environments: []string{"production"}}, + }} + minter := &stubMinter{} + store := newStubStore() + // pre-populate existing state — secrets exist for both orgs and we own them. + store.existsByName["altinn-gitea-runner-ttd-secret"] = true + store.existsByName["altinn-gitea-runner-brg-secret"] = true + store.managed = []corev1.Secret{ + managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), + managedSecret("altinn-gitea-runner-brg-secret", "brg"), + } + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg"}, false) + + if rep.Outcome != OutcomeSuccess { + t.Errorf("outcome = %v, want success", rep.Outcome) + } + if len(minter.calls) != 0 { + t.Errorf("minter should not be called on idempotent re-run; got %v", minter.calls) + } + if len(store.createdSecrets) != 0 { + t.Errorf("no creates expected; got %v", store.createdSecrets) + } + if len(store.deletedSecrets) != 0 { + t.Errorf("no deletes expected; got %v", store.deletedSecrets) + } + wantSkipped := []string{"brg", "ttd"} + if !equalSlice(rep.SecretsSkipped, wantSkipped) { + t.Errorf("SecretsSkipped = %v, want %v", rep.SecretsSkipped, wantSkipped) + } +} + +// Scenario 3: org added to desired set → exactly one mint + create. +func TestRun_OrgAdded(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "brg", Environments: []string{"production"}}, + {Code: "dsb", Environments: []string{"production"}}, // new + }} + minter := &stubMinter{} + store := newStubStore() + store.existsByName["altinn-gitea-runner-ttd-secret"] = true + store.existsByName["altinn-gitea-runner-brg-secret"] = true + store.managed = []corev1.Secret{ + managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), + managedSecret("altinn-gitea-runner-brg-secret", "brg"), + } + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg", "dsb"}, false) + + if rep.Outcome != OutcomeSuccess { + t.Errorf("outcome = %v, want success", rep.Outcome) + } + if !equalSlice(minter.calls, []string{"dsb"}) { + t.Errorf("minter calls = %v, want [dsb]", minter.calls) + } + if !equalSlice(rep.SecretsCreated, []string{"dsb"}) { + t.Errorf("SecretsCreated = %v, want [dsb]", rep.SecretsCreated) + } +} + +// Scenario 4: org removed from CDN → its Secret is deleted, ConfigMap reflects. +func TestRun_OrgRemoved(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + // brg is gone from CDN + }} + minter := &stubMinter{} + store := newStubStore() + store.existsByName["altinn-gitea-runner-ttd-secret"] = true + store.existsByName["altinn-gitea-runner-brg-secret"] = true + store.managed = []corev1.Secret{ + managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), + managedSecret("altinn-gitea-runner-brg-secret", "brg"), + } + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg"}, false) + + if rep.Outcome != OutcomeSuccess { + t.Errorf("outcome = %v, want success", rep.Outcome) + } + if !equalSlice(store.deletedSecrets, []string{"altinn-gitea-runner-brg-secret"}) { + t.Errorf("deletedSecrets = %v", store.deletedSecrets) + } + if !equalSlice(rep.SecretsDeleted, []string{"brg"}) { + t.Errorf("SecretsDeleted = %v, want [brg]", rep.SecretsDeleted) + } +} + +// Scenario 5: org with empty environments → filtered out, no work for it. +func TestRun_FilteredByEmptyEnvironments(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "acn", Environments: nil}, // test org, no envs → filter out + }} + minter := &stubMinter{} + store := newStubStore() + + rep := runReconciler(t, src, minter, store, []string{"ttd", "acn"}, false) + + if !equalSlice(rep.FilteredNoEnv, []string{"acn"}) { + t.Errorf("FilteredNoEnv = %v, want [acn]", rep.FilteredNoEnv) + } + if !equalSlice(rep.Desired, []string{"ttd"}) { + t.Errorf("Desired = %v, want [ttd]", rep.Desired) + } + if containsString(minter.calls, "acn") { + t.Errorf("acn should not be minted; got calls %v", minter.calls) + } +} + +// Scenario 6: whitelist excludes an otherwise-eligible org. +func TestRun_FilteredByWhitelist(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "brg", Environments: []string{"production"}}, + {Code: "extra", Environments: []string{"production"}}, // not in whitelist + }} + minter := &stubMinter{} + store := newStubStore() + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg"}, false) + + if !equalSlice(rep.FilteredWhitelist, []string{"extra"}) { + t.Errorf("FilteredWhitelist = %v, want [extra]", rep.FilteredWhitelist) + } + if !equalSlice(rep.Desired, []string{"brg", "ttd"}) { + t.Errorf("Desired = %v, want [brg ttd]", rep.Desired) + } +} + +// Scenario 7: Gitea fails for one org, others succeed; failed org omitted from CM. +func TestRun_GiteaPartialFailure(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "brg", Environments: []string{"production"}}, + {Code: "dsb", Environments: []string{"production"}}, + }} + minter := &stubMinter{ + errs: map[string]error{"brg": errors.New("gitea 500")}, + } + store := newStubStore() + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg", "dsb"}, false) + + if rep.Outcome != OutcomePartial { + t.Errorf("outcome = %v, want partial", rep.Outcome) + } + if len(rep.FailedOrgs) != 1 || rep.FailedOrgs[0].Org != "brg" || rep.FailedOrgs[0].Stage != StageMint { + t.Errorf("FailedOrgs = %v, want [{brg mint ...}]", rep.FailedOrgs) + } + if containsString(rep.SecretsCreated, "brg") { + t.Errorf("brg should not be in SecretsCreated; got %v", rep.SecretsCreated) + } + if !strings.Contains(store.appliedCMData[ConfigMapDataKey], "name: ttd") { + t.Errorf("ConfigMap should include ttd") + } + if strings.Contains(store.appliedCMData[ConfigMapDataKey], "name: brg") { + t.Errorf("ConfigMap should NOT include brg (mint failed)") + } +} + +// --- additional coverage ---------------------------------------------------- + +func TestRun_FatalOnSourceError(t *testing.T) { + r, _ := New(Options{ + Source: &stubSource{err: errors.New("cdn down")}, + Minter: &stubMinter{}, + Store: newStubStore(), + SecretNameFor: secretNameFor, + ConfigMapName: "runner-org-list", + Whitelist: []string{"ttd"}, + }) + _, err := r.Run(context.Background()) + if err == nil { + t.Fatal("expected fatal error, got nil") + } +} + +func TestRun_FatalOnApplyConfigMapError(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{{Code: "ttd", Environments: []string{"tt02"}}}} + store := newStubStore() + store.applyCMErr = errors.New("apiserver hiccup") + + r, _ := New(Options{ + Source: src, + Minter: &stubMinter{}, + Store: store, + SecretNameFor: secretNameFor, + ConfigMapName: "runner-org-list", + Whitelist: []string{"ttd"}, + }) + _, err := r.Run(context.Background()) + if err == nil { + t.Fatal("expected fatal error, got nil") + } +} + +func TestRun_SyncAllSkipsWhitelist(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "brg", Environments: []string{"production"}}, + }} + r, _ := New(Options{ + Source: src, + Minter: &stubMinter{}, + Store: newStubStore(), + SecretNameFor: secretNameFor, + ConfigMapName: "runner-org-list", + SyncAll: true, + }) + rep, err := r.Run(context.Background()) + if err != nil { + t.Fatalf("Run: %v", err) + } + if !equalSlice(rep.Desired, []string{"brg", "ttd"}) { + t.Errorf("Desired = %v, want [brg ttd]", rep.Desired) + } + if len(rep.FilteredWhitelist) != 0 { + t.Errorf("nothing should be filtered by whitelist when SyncAll=true; got %v", rep.FilteredWhitelist) + } +} + +func TestRun_UnlabelledManagedSecretIsSkippedOnDelete(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{{Code: "ttd", Environments: []string{"tt02"}}}} + store := newStubStore() + store.existsByName["altinn-gitea-runner-ttd-secret"] = true + store.managed = []corev1.Secret{ + managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), + // drift: managed-by label but no org label + {ObjectMeta: metav1.ObjectMeta{Name: "stray", Labels: map[string]string{"app.kubernetes.io/managed-by": "runner-org-sync"}}}, + } + rep := runReconciler(t, src, &stubMinter{}, store, []string{"ttd"}, false) + + if rep.Outcome != OutcomeSuccess { + t.Errorf("outcome = %v, want success", rep.Outcome) + } + if containsString(store.deletedSecrets, "stray") { + t.Errorf("stray secret should not be deleted without org label; got deletes %v", store.deletedSecrets) + } +} + +func TestNew_Validation(t *testing.T) { + cases := []struct { + name string + opts Options + }{ + {"no source", Options{Minter: &stubMinter{}, Store: newStubStore(), SecretNameFor: secretNameFor, ConfigMapName: "x", Whitelist: []string{"a"}}}, + {"no minter", Options{Source: &stubSource{}, Store: newStubStore(), SecretNameFor: secretNameFor, ConfigMapName: "x", Whitelist: []string{"a"}}}, + {"no store", Options{Source: &stubSource{}, Minter: &stubMinter{}, SecretNameFor: secretNameFor, ConfigMapName: "x", Whitelist: []string{"a"}}}, + {"no secretNameFor", Options{Source: &stubSource{}, Minter: &stubMinter{}, Store: newStubStore(), ConfigMapName: "x", Whitelist: []string{"a"}}}, + {"no configMapName", Options{Source: &stubSource{}, Minter: &stubMinter{}, Store: newStubStore(), SecretNameFor: secretNameFor, Whitelist: []string{"a"}}}, + {"empty whitelist & !syncAll", Options{Source: &stubSource{}, Minter: &stubMinter{}, Store: newStubStore(), SecretNameFor: secretNameFor, ConfigMapName: "x"}}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + if _, err := New(c.opts); err == nil { + t.Errorf("expected error, got nil") + } + }) + } +} + +// --- utilities -------------------------------------------------------------- + +func equalSlice(a, b []string) bool { + if len(a) != len(b) { + return false + } + ac, bc := append([]string(nil), a...), append([]string(nil), b...) + sort.Strings(ac) + sort.Strings(bc) + for i := range ac { + if ac[i] != bc[i] { + return false + } + } + return true +} + +func containsString(haystack []string, needle string) bool { + for _, s := range haystack { + if s == needle { + return true + } + } + return false +} diff --git a/src/runner-org-sync/internal/telemetry/metrics.go b/src/runner-org-sync/internal/telemetry/metrics.go new file mode 100644 index 00000000000..409a8b36b62 --- /dev/null +++ b/src/runner-org-sync/internal/telemetry/metrics.go @@ -0,0 +1,108 @@ +package telemetry + +import ( + "context" + "fmt" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +// Metrics is the typed bundle of instruments emitted by runner-org-sync. +// Construct once at startup with NewMetrics; record from the run summary +// after Reconciler.Run returns. +type Metrics struct { + ReconcileDuration metric.Float64Histogram + ReconcileRuns metric.Int64Counter + OrgsDiscovered metric.Int64Gauge + OrgsDesired metric.Int64Gauge + OrgsFiltered metric.Int64Counter + SecretsCreated metric.Int64Counter + SecretsDeleted metric.Int64Counter + SecretsSkipped metric.Int64Counter + OrgReconcileErrors metric.Int64Counter + GiteaCallDuration metric.Float64Histogram + KeyVaultDuration metric.Float64Histogram + CDNCallDuration metric.Float64Histogram + ConfigMapApplied metric.Int64Counter +} + +// NewMetrics constructs every instrument from the supplied Meter. Returns +// an error if any instrument cannot be created; in practice this only fires +// on misconfigured SDKs. +func NewMetrics(m metric.Meter) (*Metrics, error) { + mk := func(target *metric.Float64Histogram, name, desc, unit string) error { + h, err := m.Float64Histogram(name, metric.WithDescription(desc), metric.WithUnit(unit)) + if err != nil { + return fmt.Errorf("telemetry: histogram %s: %w", name, err) + } + *target = h + return nil + } + mc := func(target *metric.Int64Counter, name, desc string) error { + c, err := m.Int64Counter(name, metric.WithDescription(desc)) + if err != nil { + return fmt.Errorf("telemetry: counter %s: %w", name, err) + } + *target = c + return nil + } + mg := func(target *metric.Int64Gauge, name, desc string) error { + g, err := m.Int64Gauge(name, metric.WithDescription(desc)) + if err != nil { + return fmt.Errorf("telemetry: gauge %s: %w", name, err) + } + *target = g + return nil + } + + out := &Metrics{} + if err := mk(&out.ReconcileDuration, "runner_org_sync.reconcile.duration", "End-to-end reconcile run duration.", "s"); err != nil { + return nil, err + } + if err := mc(&out.ReconcileRuns, "runner_org_sync.reconcile.runs", "Reconcile run count by outcome."); err != nil { + return nil, err + } + if err := mg(&out.OrgsDiscovered, "runner_org_sync.orgs.discovered", "Orgs returned by the CDN."); err != nil { + return nil, err + } + if err := mg(&out.OrgsDesired, "runner_org_sync.orgs.desired", "Orgs after environment + whitelist filter."); err != nil { + return nil, err + } + if err := mc(&out.OrgsFiltered, "runner_org_sync.orgs.filtered", "Orgs filtered out, by reason."); err != nil { + return nil, err + } + if err := mc(&out.SecretsCreated, "runner_org_sync.secrets.created", "Per-org Secrets created this run."); err != nil { + return nil, err + } + if err := mc(&out.SecretsDeleted, "runner_org_sync.secrets.deleted", "Per-org Secrets deleted this run."); err != nil { + return nil, err + } + if err := mc(&out.SecretsSkipped, "runner_org_sync.secrets.skipped", "Per-org Secrets left untouched (already existed)."); err != nil { + return nil, err + } + if err := mc(&out.OrgReconcileErrors, "runner_org_sync.org.reconcile_errors", "Per-org reconcile failures by stage."); err != nil { + return nil, err + } + if err := mk(&out.GiteaCallDuration, "runner_org_sync.gitea.call.duration", "Gitea admin API call duration.", "s"); err != nil { + return nil, err + } + if err := mk(&out.KeyVaultDuration, "runner_org_sync.keyvault.call.duration", "Azure Key Vault secret fetch duration.", "s"); err != nil { + return nil, err + } + if err := mk(&out.CDNCallDuration, "runner_org_sync.cdn.call.duration", "CDN fetch duration for altinn-orgs.json.", "s"); err != nil { + return nil, err + } + if err := mc(&out.ConfigMapApplied, "runner_org_sync.configmap.applied", "ConfigMap apply attempts by changed=true|false."); err != nil { + return nil, err + } + return out, nil +} + +// RecordFiltered increments OrgsFiltered with the given reason attribute. +func (m *Metrics) RecordFiltered(ctx context.Context, reason string, n int) { + if n <= 0 { + return + } + m.OrgsFiltered.Add(ctx, int64(n), metric.WithAttributes(attribute.String("reason", reason))) +} diff --git a/src/runner-org-sync/internal/telemetry/telemetry.go b/src/runner-org-sync/internal/telemetry/telemetry.go new file mode 100644 index 00000000000..a58717fbbac --- /dev/null +++ b/src/runner-org-sync/internal/telemetry/telemetry.go @@ -0,0 +1,131 @@ +// Package telemetry configures OpenTelemetry traces and metrics, plus a +// structured slog logger writing JSON to stdout. +// +// Init returns a Telemetry value carrying ready-to-use Logger, Tracer, and +// Meter, and a Shutdown closer that flushes both the trace and metric +// pipelines. When OTEL_EXPORTER_OTLP_ENDPOINT is unset the OTLP exporters +// are skipped entirely — the no-op providers from the OTel SDK keep working +// so callers do not need conditional code paths. +package telemetry + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/sdk/resource" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" + "go.opentelemetry.io/otel/trace" +) + +// Telemetry exposes the three observability handles the rest of the service +// uses. None of them require a non-nil OTLP endpoint to be safe to call. +type Telemetry struct { + Logger *slog.Logger + Tracer trace.Tracer + Meter metric.Meter +} + +// Shutdown flushes and stops the OTel pipelines. Always call on exit, with +// a short bounded context (10s is plenty). +type Shutdown func(ctx context.Context) error + +// scope is the instrumentation scope name used for the tracer and meter. +const scope = "altinn.studio/runner-org-sync" + +// Init configures providers and returns ready-to-use handles. serviceName +// defaults to "runner-org-sync" when empty and overrides any value the SDK +// would otherwise pick up from OTEL_SERVICE_NAME. +func Init(ctx context.Context, serviceName string) (*Telemetry, Shutdown, error) { + if serviceName == "" { + serviceName = "runner-org-sync" + } + + logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelInfo, + })) + slog.SetDefault(logger) + + res, err := resource.New(ctx, + resource.WithAttributes(semconv.ServiceName(serviceName)), + resource.WithFromEnv(), // OTEL_RESOURCE_ATTRIBUTES + resource.WithProcessPID(), + resource.WithHost(), + ) + if err != nil { + return nil, nil, fmt.Errorf("telemetry: resource: %w", err) + } + + // If no OTLP endpoint is configured (typical for local dev) skip exporters + // entirely. The default global TracerProvider / MeterProvider are no-ops, + // so call sites do not need conditional logic. + if !otlpEndpointConfigured() { + t := &Telemetry{ + Logger: logger, + Tracer: otel.Tracer(scope), + Meter: otel.Meter(scope), + } + return t, func(context.Context) error { return nil }, nil + } + + traceExp, err := otlptracegrpc.New(ctx) + if err != nil { + return nil, nil, fmt.Errorf("telemetry: trace exporter: %w", err) + } + tp := sdktrace.NewTracerProvider( + sdktrace.WithBatcher(traceExp), + sdktrace.WithResource(res), + sdktrace.WithSampler(sdktrace.AlwaysSample()), + ) + otel.SetTracerProvider(tp) + + metricExp, err := otlpmetricgrpc.New(ctx) + if err != nil { + // Best-effort cleanup of the already-installed trace exporter so we + // do not leave background goroutines if Init returns an error. + _ = tp.Shutdown(ctx) + return nil, nil, fmt.Errorf("telemetry: metric exporter: %w", err) + } + mp := sdkmetric.NewMeterProvider( + sdkmetric.WithResource(res), + sdkmetric.WithReader(sdkmetric.NewPeriodicReader(metricExp, + sdkmetric.WithInterval(15*time.Second), + )), + ) + otel.SetMeterProvider(mp) + + shutdown := func(ctx context.Context) error { + return errors.Join( + tp.Shutdown(ctx), + mp.Shutdown(ctx), + ) + } + + return &Telemetry{ + Logger: logger, + Tracer: otel.Tracer(scope), + Meter: otel.Meter(scope), + }, shutdown, nil +} + +func otlpEndpointConfigured() bool { + for _, k := range []string{ + "OTEL_EXPORTER_OTLP_ENDPOINT", + "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", + "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", + } { + if os.Getenv(k) != "" { + return true + } + } + return false +} From fa4ac168b64fd548a925afcd5ddbe4b6796b7876 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Tue, 12 May 2026 12:36:54 +0200 Subject: [PATCH 02/33] fix gitea api to be in sync with 1.26 version --- src/runner-org-sync/internal/gitea/gitea.go | 9 ++++++++- src/runner-org-sync/internal/gitea/gitea_test.go | 7 ++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/runner-org-sync/internal/gitea/gitea.go b/src/runner-org-sync/internal/gitea/gitea.go index 73b5f5928bf..c51f211a3e8 100644 --- a/src/runner-org-sync/internal/gitea/gitea.go +++ b/src/runner-org-sync/internal/gitea/gitea.go @@ -72,6 +72,13 @@ func NewClient(baseURL, pat string, opts ...Option) *Client { // MintRegistrationToken returns a fresh runner registration token for the // given organisation. org is the short Gitea organisation name (e.g. "ttd"). +// +// The endpoint requires HTTP POST in Gitea 1.26+ (the GET form was removed). +// Tokens themselves have no time-based expiry. However, each POST atomically +// deactivates every previously-issued token for the same org — Gitea allows +// at most one active org-scoped registration token at a time. Callers must +// therefore mint only when no usable token exists, otherwise any not-yet- +// registered runner using an older Secret value will fail to register. func (c *Client) MintRegistrationToken(ctx context.Context, org string) (string, error) { if org == "" { return "", errors.New("gitea: org is required") @@ -79,7 +86,7 @@ func (c *Client) MintRegistrationToken(ctx context.Context, org string) (string, endpoint := fmt.Sprintf("%s/api/v1/orgs/%s/actions/runners/registration-token", c.baseURL, url.PathEscape(org)) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, nil) if err != nil { return "", fmt.Errorf("gitea: build request: %w", err) } diff --git a/src/runner-org-sync/internal/gitea/gitea_test.go b/src/runner-org-sync/internal/gitea/gitea_test.go index 41514748c34..03f605c21c3 100644 --- a/src/runner-org-sync/internal/gitea/gitea_test.go +++ b/src/runner-org-sync/internal/gitea/gitea_test.go @@ -18,8 +18,9 @@ func newStubServer(t *testing.T, handler http.HandlerFunc) *httptest.Server { } func TestMintRegistrationToken_Happy(t *testing.T) { - var gotPath, gotAuth, gotUA string + var gotMethod, gotPath, gotAuth, gotUA string s := newStubServer(t, func(w http.ResponseWriter, r *http.Request) { + gotMethod = r.Method gotPath = r.URL.Path gotAuth = r.Header.Get("Authorization") gotUA = r.Header.Get("User-Agent") @@ -35,6 +36,10 @@ func TestMintRegistrationToken_Happy(t *testing.T) { if token != "reg-token-abc" { t.Errorf("token = %q, want reg-token-abc", token) } + // Gitea 1.26+ requires POST; the legacy GET form was removed. + if gotMethod != http.MethodPost { + t.Errorf("method = %q, want POST", gotMethod) + } if want := "/api/v1/orgs/ttd/actions/runners/registration-token"; gotPath != want { t.Errorf("path = %q, want %q", gotPath, want) } From c0fb1144231263e6166b402ef106a66e058b31cf Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Tue, 12 May 2026 23:14:22 +0200 Subject: [PATCH 03/33] remove replica from configmap --- src/runner-org-sync/internal/reconcile/reconcile.go | 6 ++++-- src/runner-org-sync/internal/reconcile/reconcile_test.go | 3 --- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/runner-org-sync/internal/reconcile/reconcile.go b/src/runner-org-sync/internal/reconcile/reconcile.go index cb20d9ec460..8f84d596d48 100644 --- a/src/runner-org-sync/internal/reconcile/reconcile.go +++ b/src/runner-org-sync/internal/reconcile/reconcile.go @@ -21,7 +21,6 @@ import ( // Defaults used when the caller does not override. const ( - DefaultReplicas = 1 ConfigMapDataKey = "runners.yaml" FilterReasonNoEnv = "no_environments" FilterReasonWhitelist = "not_in_whitelist" @@ -274,6 +273,10 @@ func (r *Reconciler) filter(orgs []cdn.Org, report *Report) []cdn.Org { // HelmRelease via Flux valuesFrom (targetPath: runners). Determinism via // sorted input is required so unchanged state produces unchanged output and // ApplyConfigMap detects "no change" correctly. +// +// Replica count is deliberately omitted: scaling is owned by KEDA ScaledJobs +// on the consumer side, so a runner-org-sync-supplied replicas field would +// be ignored at best and misleading at worst. func renderRunners(orgs []string, secretNameFor func(org string) string) string { if len(orgs) == 0 { return "[]\n" @@ -281,7 +284,6 @@ func renderRunners(orgs []string, secretNameFor func(org string) string) string var b strings.Builder for _, org := range orgs { fmt.Fprintf(&b, "- name: %s\n", org) - fmt.Fprintf(&b, " replicas: %d\n", DefaultReplicas) fmt.Fprintf(&b, " registrationTokenSecretName: %s\n", secretNameFor(org)) } return b.String() diff --git a/src/runner-org-sync/internal/reconcile/reconcile_test.go b/src/runner-org-sync/internal/reconcile/reconcile_test.go index e46e15125af..a5ef8e98176 100644 --- a/src/runner-org-sync/internal/reconcile/reconcile_test.go +++ b/src/runner-org-sync/internal/reconcile/reconcile_test.go @@ -167,13 +167,10 @@ func TestRun_ColdStart(t *testing.T) { } wantBody := strings.Join([]string{ "- name: brg", - " replicas: 1", " registrationTokenSecretName: altinn-gitea-runner-brg-secret", "- name: dsb", - " replicas: 1", " registrationTokenSecretName: altinn-gitea-runner-dsb-secret", "- name: ttd", - " replicas: 1", " registrationTokenSecretName: altinn-gitea-runner-ttd-secret", "", }, "\n") From c45171c015bfa0c5f68879ea1a2a475f234bd5a8 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Tue, 12 May 2026 23:28:26 +0200 Subject: [PATCH 04/33] install keda in studio cluster --- infra/studio/syncroot/base/keda.yaml | 57 +++++++++++++++++++ infra/studio/syncroot/base/kustomization.yaml | 1 + 2 files changed, 58 insertions(+) create mode 100644 infra/studio/syncroot/base/keda.yaml diff --git a/infra/studio/syncroot/base/keda.yaml b/infra/studio/syncroot/base/keda.yaml new file mode 100644 index 00000000000..0f0e2a24966 --- /dev/null +++ b/infra/studio/syncroot/base/keda.yaml @@ -0,0 +1,57 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: keda +--- +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: keda + namespace: keda +spec: + type: oci + interval: 1h + url: oci://ghcr.io/kedacore/charts +--- +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: keda + namespace: keda +spec: + interval: 10m + timeout: 5m + releaseName: keda + targetNamespace: keda + install: + remediation: + retries: 5 + upgrade: + remediation: + retries: 5 + chart: + spec: + chart: keda + version: 2.19.0 + sourceRef: + kind: HelmRepository + name: keda + namespace: keda + values: + # KEDA's defaults are sensible. Keep this block intentionally small; + # any per-environment knobs (e.g. higher resource limits in prod) can + # be added later via Flux postBuild substitution or per-env overlays. + resources: + operator: + requests: + cpu: 100m + memory: 128Mi + limits: + memory: 1Gi + metricServer: + requests: + cpu: 100m + memory: 128Mi + limits: + memory: 1Gi diff --git a/infra/studio/syncroot/base/kustomization.yaml b/infra/studio/syncroot/base/kustomization.yaml index 9241bdd628f..df308c1fc6f 100644 --- a/infra/studio/syncroot/base/kustomization.yaml +++ b/infra/studio/syncroot/base/kustomization.yaml @@ -11,3 +11,4 @@ resources: - lhci.yaml - observability.yaml - otel-operator.yaml + - keda.yaml From 2a7323e8ad4532c37b78dca49588033c5775beb7 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 08:30:03 +0200 Subject: [PATCH 05/33] use job instead of deployment for runners --- .../templates/deployment.yaml | 157 ---------------- .../gitea-org-runner/templates/scaledjob.yaml | 172 ++++++++++++++++++ charts/gitea-org-runner/values.yaml | 91 +++++---- 3 files changed, 225 insertions(+), 195 deletions(-) delete mode 100644 charts/gitea-org-runner/templates/deployment.yaml create mode 100644 charts/gitea-org-runner/templates/scaledjob.yaml diff --git a/charts/gitea-org-runner/templates/deployment.yaml b/charts/gitea-org-runner/templates/deployment.yaml deleted file mode 100644 index d7523e93dbc..00000000000 --- a/charts/gitea-org-runner/templates/deployment.yaml +++ /dev/null @@ -1,157 +0,0 @@ -{{- range .Values.runners }} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "gitea-org-runner.fullname" $ }}-{{ .name }} - labels: - {{- include "gitea-org-runner.labels" $ | nindent 4 }} - runner: {{ .name }} -spec: - replicas: {{ .replicas | default 1 }} - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 1 - maxUnavailable: 0 - selector: - matchLabels: - {{- include "gitea-org-runner.selectorLabels" $ | nindent 6 }} - runner: {{ .name }} - template: - metadata: - {{- with $.Values.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "gitea-org-runner.labels" $ | nindent 8 }} - runner: {{ .name }} - {{- with $.Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with $.Values.runtimeClassName }} - runtimeClassName: {{ . }} - {{- end }} - {{- with $.Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $.Values.podSecurityContext }} - securityContext: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- if $.Values.dockerInDocker.enabled }} - initContainers: - - name: dockerd - image: "{{ $.Values.dockerInDocker.image.repository }}:{{ $.Values.dockerInDocker.image.tag }}" - imagePullPolicy: {{ $.Values.dockerInDocker.image.pullPolicy }} - restartPolicy: Always - securityContext: - privileged: true - env: - - name: DOCKER_TLS_CERTDIR - value: "/certs" - command: ["sh", "-c"] - args: - - | - if [ "$(df -PT /var/lib/docker | awk 'NR==2 {print $2}')" = "virtiofs" ]; then - apk add --no-cache e2fsprogs && - truncate -s {{ $.Values.dockerInDocker.diskSize | default "15G" }} /tmp/docker-disk.img && - mkfs.ext4 -F /tmp/docker-disk.img && - mount /tmp/docker-disk.img /var/lib/docker - fi - exec dockerd-entrypoint.sh --host=tcp://0.0.0.0:2376 - startupProbe: - tcpSocket: - port: 2376 - periodSeconds: 2 - failureThreshold: 45 - volumeMounts: - - name: docker-certs - mountPath: /certs - {{- if $.Values.shareWorkspace.enabled }} - - name: act-cache - mountPath: {{ $.Values.shareWorkspace.path | default "/root/.cache/act" }} - {{- end }} - {{- end }} - containers: - - name: {{ $.Chart.Name }} - {{- with $.Values.securityContext }} - securityContext: - {{- toYaml . | nindent 12 }} - {{- end }} - image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag | default $.Chart.AppVersion }}" - imagePullPolicy: {{ $.Values.image.pullPolicy }} - env: - {{- if $.Values.dockerInDocker.enabled }} - - name: DOCKER_HOST - value: "tcp://localhost:2376" - - name: DOCKER_TLS_VERIFY - value: "1" - - name: DOCKER_CERT_PATH - value: "/certs/client" - {{- end }} - {{- if .environmentVariables }} - {{- range $variable := .environmentVariables}} - - name: {{ $variable.name }} - {{- if $variable.value }} - value: {{ $variable.value | quote }} - {{- end }} - {{- if $variable.valueFrom }} - valueFrom: - secretKeyRef: - name: {{ $variable.valueFrom.secretKeyRef.name }} - key: {{ $variable.valueFrom.secretKeyRef.key }} - {{- end }} - {{- end }} - {{- end }} - - {{- with $.Values.resources }} - resources: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- if or $.Values.volumeMounts $.Values.dockerInDocker.enabled $.Values.shareWorkspace.enabled }} - volumeMounts: - {{- with $.Values.volumeMounts }} - {{- toYaml . | nindent 12 }} - {{- end }} - {{- if $.Values.dockerInDocker.enabled }} - - name: docker-certs - mountPath: /certs - readOnly: true - {{- end }} - {{- if $.Values.shareWorkspace.enabled }} - - name: act-cache - mountPath: {{ $.Values.shareWorkspace.path | default "/root/.cache/act" }} - {{- end }} - {{- end }} - {{- if or $.Values.volumes $.Values.dockerInDocker.enabled $.Values.shareWorkspace.enabled }} - volumes: - {{- with $.Values.volumes }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- if $.Values.dockerInDocker.enabled }} - - name: docker-certs - emptyDir: {} - {{- end }} - {{- if $.Values.shareWorkspace.enabled }} - - name: act-cache - emptyDir: - sizeLimit: {{ $.Values.shareWorkspace.sizeLimit | default "10Gi" }} - {{- end }} - {{- end }} - {{- with $.Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $.Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $.Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} diff --git a/charts/gitea-org-runner/templates/scaledjob.yaml b/charts/gitea-org-runner/templates/scaledjob.yaml new file mode 100644 index 00000000000..67920f3393c --- /dev/null +++ b/charts/gitea-org-runner/templates/scaledjob.yaml @@ -0,0 +1,172 @@ +{{- range .Values.runners }} +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledJob +metadata: + name: {{ include "gitea-org-runner.fullname" $ }}-{{ .name }} + labels: + {{- include "gitea-org-runner.labels" $ | nindent 4 }} + runner: {{ .name }} +spec: + # Per-org concurrency cap: take the override if defined, otherwise the chart-wide default. + # KEDA creates at most this many concurrent Jobs from the template below. + maxReplicaCount: {{ index $.Values.maxConcurrentOverrides .name | default $.Values.maxConcurrentDefault }} + minReplicaCount: 0 + pollingInterval: {{ $.Values.keda.pollingInterval | default 30 }} + successfulJobsHistoryLimit: {{ $.Values.keda.successfulJobsHistoryLimit | default 5 }} + failedJobsHistoryLimit: {{ $.Values.keda.failedJobsHistoryLimit | default 5 }} + jobTargetRef: + backoffLimit: 0 + ttlSecondsAfterFinished: {{ $.Values.keda.ttlSecondsAfterFinished | default 300 }} + template: + metadata: + labels: + {{- include "gitea-org-runner.labels" $ | nindent 10 }} + runner: {{ .name }} + {{- with $.Values.podLabels }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with $.Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 10 }} + {{- end }} + spec: + {{- with $.Values.runtimeClassName }} + runtimeClassName: {{ . }} + {{- end }} + restartPolicy: Never + {{- with $.Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with $.Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- if $.Values.dockerInDocker.enabled }} + initContainers: + - name: dockerd + image: "{{ $.Values.dockerInDocker.image.repository }}:{{ $.Values.dockerInDocker.image.tag }}" + imagePullPolicy: {{ $.Values.dockerInDocker.image.pullPolicy }} + restartPolicy: Always + securityContext: + privileged: true + env: + - name: DOCKER_TLS_CERTDIR + value: "/certs" + command: ["sh", "-c"] + args: + - | + if [ "$(df -PT /var/lib/docker | awk 'NR==2 {print $2}')" = "virtiofs" ]; then + apk add --no-cache e2fsprogs && + truncate -s {{ $.Values.dockerInDocker.diskSize | default "15G" }} /tmp/docker-disk.img && + mkfs.ext4 -F /tmp/docker-disk.img && + mount /tmp/docker-disk.img /var/lib/docker + fi + exec dockerd-entrypoint.sh --host=tcp://0.0.0.0:2376 + startupProbe: + tcpSocket: + port: 2376 + periodSeconds: 2 + failureThreshold: 45 + volumeMounts: + - name: docker-certs + mountPath: /certs + {{- if $.Values.shareWorkspace.enabled }} + - name: act-cache + mountPath: {{ $.Values.shareWorkspace.path | default "/root/.cache/act" }} + {{- end }} + {{- end }} + containers: + - name: {{ $.Chart.Name }} + {{- with $.Values.securityContext }} + securityContext: + {{- toYaml . | nindent 14 }} + {{- end }} + image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag | default $.Chart.AppVersion }}" + imagePullPolicy: {{ $.Values.image.pullPolicy }} + env: + {{- if $.Values.dockerInDocker.enabled }} + - name: DOCKER_HOST + value: "tcp://localhost:2376" + - name: DOCKER_TLS_VERIFY + value: "1" + - name: DOCKER_CERT_PATH + value: "/certs/client" + {{- end }} + # Runner registration + behaviour. Constructed from chart-level config + # (giteaInstanceUrl, runnerLabels) and the per-org Secret produced by + # runner-org-sync — no per-runner environmentVariables block required. + - name: GITEA_INSTANCE_URL + value: {{ $.Values.gitea.instanceUrl | quote }} + - name: GITEA_RUNNER_EPHEMERAL + value: "1" + - name: GITEA_RUNNER_LABELS + value: {{ $.Values.gitea.runnerLabels | quote }} + - name: GITEA_RUNNER_REGISTRATION_TOKEN + valueFrom: + secretKeyRef: + name: {{ .registrationTokenSecretName }} + key: token + {{- with $.Values.extraEnv }} + {{- toYaml . | nindent 14 }} + {{- end }} + {{- with $.Values.resources }} + resources: + {{- toYaml . | nindent 14 }} + {{- end }} + {{- if or $.Values.volumeMounts $.Values.dockerInDocker.enabled $.Values.shareWorkspace.enabled }} + volumeMounts: + {{- with $.Values.volumeMounts }} + {{- toYaml . | nindent 14 }} + {{- end }} + {{- if $.Values.dockerInDocker.enabled }} + - name: docker-certs + mountPath: /certs + readOnly: true + {{- end }} + {{- if $.Values.shareWorkspace.enabled }} + - name: act-cache + mountPath: {{ $.Values.shareWorkspace.path | default "/root/.cache/act" }} + {{- end }} + {{- end }} + {{- if or $.Values.volumes $.Values.dockerInDocker.enabled $.Values.shareWorkspace.enabled }} + volumes: + {{- with $.Values.volumes }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- if $.Values.dockerInDocker.enabled }} + - name: docker-certs + emptyDir: {} + {{- end }} + {{- if $.Values.shareWorkspace.enabled }} + - name: act-cache + emptyDir: + sizeLimit: {{ $.Values.shareWorkspace.sizeLimit | default "10Gi" }} + {{- end }} + {{- end }} + {{- with $.Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with $.Values.affinity }} + affinity: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with $.Values.tolerations }} + tolerations: + {{- toYaml . | nindent 10 }} + {{- end }} + triggers: + - type: github-runner + metadata: + # github-runner scaler is GitHub-compatible; pointed at Gitea's API URL + # it polls the org's Actions queue depth via the same shape. + githubAPIURL: {{ $.Values.gitea.apiUrl | quote }} + owner: {{ .name | quote }} + runnerScope: "org" + labels: {{ $.Values.gitea.runnerLabels | quote }} + targetWorkflowQueueLength: {{ $.Values.keda.targetWorkflowQueueLength | default "1" | quote }} + authenticationRef: + name: {{ $.Values.keda.authenticationRef.name }} +{{- end }} diff --git a/charts/gitea-org-runner/values.yaml b/charts/gitea-org-runner/values.yaml index 6f178abbdda..32eb24ec72b 100644 --- a/charts/gitea-org-runner/values.yaml +++ b/charts/gitea-org-runner/values.yaml @@ -56,32 +56,58 @@ shareWorkspace: path: "/root/.cache/act" sizeLimit: "10Gi" -# List of runners to deploy. Each runner will create a separate deployment. +# Gitea connection details. Used both for the runner registration env vars +# inside each Job pod, and for the KEDA scaler's queue-depth polling URL. +gitea: + # Base URL of the Gitea instance (no /api/v1 suffix). Injected as + # GITEA_INSTANCE_URL on the runner container so act_runner registers here. + instanceUrl: "http://altinn-repositories-public.default.svc.cluster.local" + # Full API URL including /api/v1. Consumed by the KEDA github-runner scaler + # which polls the Actions queue. Gitea's Actions API is GitHub-compatible. + apiUrl: "http://altinn-repositories-public.default.svc.cluster.local/api/v1" + # Labels advertised by the runner; must match the labels referenced in + # workflow `runs-on:` entries (and the scaler's `labels` filter). + runnerLabels: "ubuntu-latest:host" + +# KEDA / ScaledJob configuration. Per-org caps live below in maxConcurrent*. +keda: + # Reference to a TriggerAuthentication CRD in the same namespace as the + # ScaledJob, holding the read-only Gitea PAT the scaler uses to poll the + # queue. Created out-of-band (one per cluster), not by this chart. + authenticationRef: + name: keda-gitea-auth + # How often KEDA polls Gitea per ScaledJob. 30s is the KEDA default; raise + # if Gitea complains about request rate (1 req per org per pollingInterval). + pollingInterval: 30 + # KEDA cleans up completed Jobs older than this many seconds. + ttlSecondsAfterFinished: 300 + # K8s keeps the last N successful / failed Job records visible. + successfulJobsHistoryLimit: 5 + failedJobsHistoryLimit: 5 + # How many queued workflows each runner is expected to drain. With "1", + # KEDA wants one new runner per queued workflow (subject to maxReplicaCount). + targetWorkflowQueueLength: "1" + +# Per-org concurrency caps. The chart applies maxConcurrentDefault to every +# org unless overridden by an explicit entry in maxConcurrentOverrides. +# maxConcurrentOverrides is keyed by org code (matching runners[].name). +maxConcurrentDefault: 2 +maxConcurrentOverrides: {} + # brg: 6 + # ssb: 5 + +# List of orgs to render a ScaledJob for. Populated by the consumer (typically +# the gitea-org-runner-config wrapper chart via Flux valuesFrom from the +# runner-org-list ConfigMap that runner-org-sync writes). +# +# Shape: [{ name, registrationTokenSecretName }]. The chart constructs all +# runner env vars (GITEA_INSTANCE_URL, GITEA_RUNNER_LABELS, etc.) from +# chart-level config; no per-runner environmentVariables block is needed. runners: [] # - name: ttd - # replicas: 5 - # environmentVariables: - # - name: GITEA_INSTANCE_URL - # value: "https://gitea.example.com" - # - name: GITEA_RUNNER_REGISTRATION_TOKEN - # valueFrom: - # secretKeyRef: - # name: gitea-runner-ttd-token - # key: token - # - name: GITEA_RUNNER_NAME - # value: "ttd-runner" + # registrationTokenSecretName: altinn-gitea-runner-ttd-secret # - name: brg - # replicas: 3 - # environmentVariables: - # - name: GITEA_INSTANCE_URL - # value: "https://gitea.example.com" - # - name: GITEA_RUNNER_REGISTRATION_TOKEN - # valueFrom: - # secretKeyRef: - # name: gitea-runner-brg-token - # key: token - # - name: GITEA_RUNNER_NAME - # value: "brg-runner" + # registrationTokenSecretName: altinn-gitea-runner-brg-secret # This is for setting Kubernetes Annotations to a Pod. # For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ @@ -95,7 +121,6 @@ podSecurityContext: # securityContext: # privileged: false - # capabilities: # drop: # - ALL @@ -103,12 +128,7 @@ podSecurityContext: # runAsNonRoot: true # runAsUser: 1000 - resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. # limits: # cpu: 100m # memory: 128Mi @@ -116,16 +136,11 @@ resources: {} # cpu: 100m # memory: 128Mi -# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/ -autoscaling: - enabled: false - minReplicas: 1 - maxReplicas: 100 - targetCPUUtilizationPercentage: 80 - # targetMemoryUtilizationPercentage: 80 +# Optional extra env vars merged onto the runner container. +extraEnv: [] + # - name: SOMETHING + # value: "..." nodeSelector: {} - tolerations: [] - affinity: {} From 3d8dbb11f8043362b1a06e8526809c8a230fcec0 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 08:53:37 +0200 Subject: [PATCH 06/33] manage keda secret --- .../cmd/runner-org-sync/main.go | 49 ++++++++- .../infra/kustomize/cronjob.yaml | 9 ++ src/runner-org-sync/infra/kustomize/role.yaml | 11 +- src/runner-org-sync/internal/config/config.go | 45 +++++++- .../internal/config/config_test.go | 54 +++++++-- .../internal/k8sstate/k8sstate.go | 56 ++++++++++ .../internal/k8sstate/k8sstate_test.go | 104 ++++++++++++++++++ .../internal/telemetry/metrics.go | 4 + 8 files changed, 316 insertions(+), 16 deletions(-) diff --git a/src/runner-org-sync/cmd/runner-org-sync/main.go b/src/runner-org-sync/cmd/runner-org-sync/main.go index d7bf90e68a9..9cd46d72909 100644 --- a/src/runner-org-sync/cmd/runner-org-sync/main.go +++ b/src/runner-org-sync/cmd/runner-org-sync/main.go @@ -78,7 +78,13 @@ func run() error { if err != nil { return fmt.Errorf("load PAT: %w", err) } - logger.Info("pat.loaded", "source", string(patSource), "len", len(pat)) + logger.Info("pat.loaded", "scope", "admin", "source", string(patSource), "len", len(pat)) + + kedaPAT, kedaPATSource, err := loadKedaPAT(ctx, cfg) + if err != nil { + return fmt.Errorf("load KEDA PAT: %w", err) + } + logger.Info("pat.loaded", "scope", "keda", "source", string(kedaPATSource), "len", len(kedaPAT)) k8sClient, err := buildK8sClient() if err != nil { @@ -139,6 +145,31 @@ func run() error { span.SetStatus(codes.Error, runErr.Error()) return runErr } + + // Project the KEDA read-only PAT into a K8s Secret. Independent of the + // per-org reconcile — runs even when the reconcile outcome is "partial" + // because the KEDA Secret has its own lifecycle. A failure here is + // non-fatal: log + metric, exit 0, retry next tick. + kedaChanged, kedaErr := store.ApplyOpaqueSecret(ctx, + cfg.KedaPATSecretName, cfg.KedaPATSecretKey, kedaPAT) + metrics.KedaSecretApplied.Add(ctx, 1, metric.WithAttributes( + attribute.Bool("changed", kedaChanged), + attribute.Bool("success", kedaErr == nil), + )) + if kedaErr != nil { + logger.Warn("keda.secret.apply.failed", "err", kedaErr.Error(), "secret", cfg.KedaPATSecretName) + span.AddEvent("keda.secret.apply.failed", trace.WithAttributes( + attribute.String("secret", cfg.KedaPATSecretName), + attribute.String("err", kedaErr.Error()), + )) + } else { + span.AddEvent("keda.secret.applied", trace.WithAttributes( + attribute.String("secret", cfg.KedaPATSecretName), + attribute.Bool("changed", kedaChanged), + )) + logger.Info("keda.secret.applied", "secret", cfg.KedaPATSecretName, "changed", kedaChanged) + } + if report.Outcome == reconcile.OutcomePartial { // Continue-on-partial: still exit 0; metric + WARN log carries the signal. span.SetStatus(codes.Ok, "partial") @@ -148,6 +179,22 @@ func run() error { return nil } +// loadKedaPAT mirrors loadPAT for the read-only KEDA PAT. Env override wins; +// otherwise fetches from the same Key Vault used for the admin PAT, at a +// different secret name (KedaPATKeyVaultSecretName). +func loadKedaPAT(ctx context.Context, cfg config.Config) (string, keyvault.Source, error) { + var getter keyvault.Getter + if cfg.KedaPATOverride == "" { + g, err := keyvault.NewAzureGetter(cfg.KeyVaultName) + if err != nil { + return "", "", fmt.Errorf("build keyvault getter: %w", err) + } + getter = g + } + loader := keyvault.NewLoader(cfg.KedaPATOverride, getter, cfg.KedaPATKeyVaultSecretName) + return loader.Load(ctx) +} + // loadPAT resolves the Gitea admin PAT, honouring the env-var override for // local development. In-cluster it goes through Azure Key Vault using // Workload Identity via DefaultAzureCredential. diff --git a/src/runner-org-sync/infra/kustomize/cronjob.yaml b/src/runner-org-sync/infra/kustomize/cronjob.yaml index 08c9b3711ca..e0f6c35e7b0 100644 --- a/src/runner-org-sync/infra/kustomize/cronjob.yaml +++ b/src/runner-org-sync/infra/kustomize/cronjob.yaml @@ -74,6 +74,15 @@ spec: value: "${RUNNER_ORG_SYNC_KEYVAULT_NAME}" - name: RUNNER_ORG_SYNC_KEYVAULT_SECRET_NAME value: "gitea-admin-pat" + # KEDA PAT projection: read-only Gitea PAT fetched from the + # same Key Vault (different secret) and written as an Opaque + # K8s Secret for KEDA's TriggerAuthentication. + - name: RUNNER_ORG_SYNC_KEDA_PAT_KEYVAULT_SECRET_NAME + value: "gitea-keda-pat" + - name: RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME + value: "keda-gitea-pat" + - name: RUNNER_ORG_SYNC_KEDA_PAT_SECRET_KEY + value: "token" - name: OTEL_SERVICE_NAME value: "runner-org-sync" - name: OTEL_EXPORTER_OTLP_ENDPOINT diff --git a/src/runner-org-sync/infra/kustomize/role.yaml b/src/runner-org-sync/infra/kustomize/role.yaml index f03fbdeb99f..9677f3d1e5f 100644 --- a/src/runner-org-sync/infra/kustomize/role.yaml +++ b/src/runner-org-sync/infra/kustomize/role.yaml @@ -3,12 +3,15 @@ kind: Role metadata: name: runner-org-sync rules: - # Per-org registration-token Secrets: list to inventory, get to check - # existence, create on onboarding, delete on offboarding. No "update" — - # we never modify a Secret in place; we delete and recreate. + # Per-org registration-token Secrets and the KEDA PAT Secret: + # - list to inventory managed registration Secrets, + # - get to check existence, + # - create on onboarding / first KEDA Secret write, + # - update for the KEDA Secret when its KV value rotates, + # - delete on offboarding. - apiGroups: [""] resources: ["secrets"] - verbs: ["get", "list", "create", "delete"] + verbs: ["get", "list", "create", "update", "delete"] # Runner-org-list ConfigMap: get to detect drift, create on first run, # update on subsequent changes. diff --git a/src/runner-org-sync/internal/config/config.go b/src/runner-org-sync/internal/config/config.go index caf59e457c0..32ad474f3bb 100644 --- a/src/runner-org-sync/internal/config/config.go +++ b/src/runner-org-sync/internal/config/config.go @@ -23,6 +23,14 @@ const ( EnvWhitelistedOrgs = "RUNNER_ORG_SYNC_ORGS" EnvGiteaPATOverride = "RUNNER_ORG_SYNC_GITEA_PAT" + // KEDA PAT projection: read-only Gitea PAT consumed by KEDA's + // github-runner scaler. Fetched from the same Key Vault as the admin + // PAT, written to a K8s Secret in OutputNamespace. + EnvKedaPATKeyVaultSecretName = "RUNNER_ORG_SYNC_KEDA_PAT_KEYVAULT_SECRET_NAME" + EnvKedaPATSecretName = "RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME" + EnvKedaPATSecretKey = "RUNNER_ORG_SYNC_KEDA_PAT_SECRET_KEY" + EnvKedaPATOverride = "RUNNER_ORG_SYNC_KEDA_PAT" + // OrgPlaceholder is the substring SecretNamePattern must contain; // it is substituted with the org code at apply time. OrgPlaceholder = "{org}" @@ -40,6 +48,14 @@ type Config struct { SyncAll bool WhitelistedOrgs []string GiteaPATOverride string + + // KEDA PAT projection settings. KedaPATKeyVaultSecretName is the secret + // name in Azure Key Vault; KedaPATSecretName/Key control the destination + // K8s Secret. KedaPATOverride is a local-dev bypass mirroring GiteaPATOverride. + KedaPATKeyVaultSecretName string + KedaPATSecretName string + KedaPATSecretKey string + KedaPATOverride string } // Getter abstracts os.Getenv so tests can inject a fake environment without @@ -65,6 +81,11 @@ func LoadFrom(get Getter) (Config, error) { SyncAll: parseBool(get(EnvSyncAll)), WhitelistedOrgs: parseCSV(get(EnvWhitelistedOrgs)), GiteaPATOverride: get(EnvGiteaPATOverride), + + KedaPATKeyVaultSecretName: strings.TrimSpace(get(EnvKedaPATKeyVaultSecretName)), + KedaPATSecretName: strings.TrimSpace(get(EnvKedaPATSecretName)), + KedaPATSecretKey: strings.TrimSpace(get(EnvKedaPATSecretKey)), + KedaPATOverride: get(EnvKedaPATOverride), } var errs []error @@ -73,17 +94,26 @@ func LoadFrom(get Getter) (Config, error) { requireField(&errs, EnvOutputNamespace, cfg.OutputNamespace) requireField(&errs, EnvSecretNamePattern, cfg.SecretNamePattern) requireField(&errs, EnvConfigMapName, cfg.ConfigMapName) + requireField(&errs, EnvKedaPATSecretName, cfg.KedaPATSecretName) + requireField(&errs, EnvKedaPATSecretKey, cfg.KedaPATSecretKey) if cfg.SecretNamePattern != "" && !strings.Contains(cfg.SecretNamePattern, OrgPlaceholder) { errs = append(errs, fmt.Errorf("%s must contain the %q placeholder", EnvSecretNamePattern, OrgPlaceholder)) } - // PAT must be reachable either via override (local dev) or via Key Vault (in-cluster). + // Admin PAT must be reachable either via override (local dev) or via Key Vault (in-cluster). if cfg.GiteaPATOverride == "" { requireField(&errs, EnvKeyVaultName, cfg.KeyVaultName) requireField(&errs, EnvKeyVaultSecretName, cfg.KeyVaultSecretName) } + // KEDA PAT has the same shape: override or KV-secret-name. KeyVaultName is + // shared with the admin PAT (one vault, multiple secrets), so it's already + // validated above. + if cfg.KedaPATOverride == "" { + requireField(&errs, EnvKedaPATKeyVaultSecretName, cfg.KedaPATKeyVaultSecretName) + } + // Either syncAll=true or a non-empty whitelist. An empty intersection is // almost certainly a misconfiguration, not an intended "sync nothing". if !cfg.SyncAll && len(cfg.WhitelistedOrgs) == 0 { @@ -101,8 +131,8 @@ func (c Config) SecretNameFor(org string) string { return strings.ReplaceAll(c.SecretNamePattern, OrgPlaceholder, org) } -// PATSource returns a short human-readable label describing where the PAT -// will be sourced from. Useful for the startup log line. +// PATSource returns a short human-readable label describing where the admin +// PAT will be sourced from. Useful for the startup log line. func (c Config) PATSource() string { if c.GiteaPATOverride != "" { return "env" @@ -110,6 +140,15 @@ func (c Config) PATSource() string { return "keyvault" } +// KedaPATSource returns where the KEDA PAT will be sourced from. Mirrors +// PATSource so the startup log makes both sources visible. +func (c Config) KedaPATSource() string { + if c.KedaPATOverride != "" { + return "env" + } + return "keyvault" +} + func requireField(errs *[]error, name, value string) { if value == "" { *errs = append(*errs, fmt.Errorf("%s is required", name)) diff --git a/src/runner-org-sync/internal/config/config_test.go b/src/runner-org-sync/internal/config/config_test.go index 205e5b39549..59776b18f9b 100644 --- a/src/runner-org-sync/internal/config/config_test.go +++ b/src/runner-org-sync/internal/config/config_test.go @@ -9,14 +9,17 @@ import ( // Tests mutate a copy to exercise one validation branch at a time. func validEnv() map[string]string { return map[string]string{ - EnvGiteaURL: "http://gitea.local", - EnvOrgsJSONURL: "https://altinncdn.no/orgs/altinn-orgs.json", - EnvOutputNamespace: "studio-runners", - EnvSecretNamePattern: "altinn-gitea-runner-{org}-secret", - EnvConfigMapName: "runner-org-list", - EnvKeyVaultName: "kv-studio", - EnvKeyVaultSecretName: "gitea-admin-pat", - EnvWhitelistedOrgs: "ttd,brg,dsb", + EnvGiteaURL: "http://gitea.local", + EnvOrgsJSONURL: "https://altinncdn.no/orgs/altinn-orgs.json", + EnvOutputNamespace: "studio-runners", + EnvSecretNamePattern: "altinn-gitea-runner-{org}-secret", + EnvConfigMapName: "runner-org-list", + EnvKeyVaultName: "kv-studio", + EnvKeyVaultSecretName: "gitea-admin-pat", + EnvWhitelistedOrgs: "ttd,brg,dsb", + EnvKedaPATKeyVaultSecretName: "gitea-keda-pat", + EnvKedaPATSecretName: "keda-gitea-pat", + EnvKedaPATSecretKey: "token", } } @@ -58,6 +61,41 @@ func TestLoadFrom_PATOverrideRelaxesKeyVaultRequirement(t *testing.T) { } } +func TestLoadFrom_KedaPATOverrideRelaxesKVRequirement(t *testing.T) { + env := validEnv() + delete(env, EnvKedaPATKeyVaultSecretName) + env[EnvKedaPATOverride] = "keda-pat-xyz" + + cfg, err := LoadFrom(getter(env)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.KedaPATSource() != "env" { + t.Errorf("KedaPATSource = %q, want env", cfg.KedaPATSource()) + } + if cfg.PATSource() != "keyvault" { + t.Errorf("PATSource = %q, want keyvault (admin still goes to KV)", cfg.PATSource()) + } +} + +func TestLoadFrom_KedaPATFieldsRequired(t *testing.T) { + env := validEnv() + delete(env, EnvKedaPATSecretName) + delete(env, EnvKedaPATSecretKey) + delete(env, EnvKedaPATKeyVaultSecretName) + + _, err := LoadFrom(getter(env)) + if err == nil { + t.Fatal("expected error, got nil") + } + msg := err.Error() + for _, want := range []string{EnvKedaPATSecretName, EnvKedaPATSecretKey, EnvKedaPATKeyVaultSecretName} { + if !strings.Contains(msg, want) { + t.Errorf("error does not mention %q; got %v", want, err) + } + } +} + func TestLoadFrom_RequiredFieldsAggregated(t *testing.T) { _, err := LoadFrom(getter(map[string]string{})) if err == nil { diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate.go b/src/runner-org-sync/internal/k8sstate/k8sstate.go index 140299c73b0..afa3028053e 100644 --- a/src/runner-org-sync/internal/k8sstate/k8sstate.go +++ b/src/runner-org-sync/internal/k8sstate/k8sstate.go @@ -9,6 +9,7 @@ package k8sstate import ( + "bytes" "context" "fmt" "maps" @@ -156,6 +157,61 @@ func (s *Store) ApplyConfigMap(ctx context.Context, name string, data map[string return true, nil } +// ApplyOpaqueSecret creates or updates a single-key Opaque Secret so its +// data[key] equals value. Returns true if a write occurred. Used for the +// KEDA PAT projection: the value originates from Key Vault, the Secret is +// consumed by KEDA's TriggerAuthentication. +// +// Labels are applied on create (ManagedBy). On update, the managed-by label +// is added if missing; other existing labels are preserved. +func (s *Store) ApplyOpaqueSecret(ctx context.Context, name, key, value string) (bool, error) { + if key == "" { + return false, fmt.Errorf("k8sstate: ApplyOpaqueSecret %s: key is required", name) + } + encoded := []byte(value) + + existing, err := s.client.CoreV1().Secrets(s.namespace).Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + desired := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: s.namespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + }, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{key: encoded}, + } + if _, err := s.client.CoreV1().Secrets(s.namespace).Create(ctx, desired, metav1.CreateOptions{}); err != nil { + return false, fmt.Errorf("k8sstate: create opaque secret %s: %w", name, err) + } + return true, nil + } + if err != nil { + return false, fmt.Errorf("k8sstate: get opaque secret %s: %w", name, err) + } + + // Only writing the single key we manage; leave any other keys untouched. + if bytes.Equal(existing.Data[key], encoded) { + return false, nil + } + + if existing.Data == nil { + existing.Data = map[string][]byte{} + } + existing.Data[key] = encoded + if existing.Labels == nil { + existing.Labels = map[string]string{} + } + existing.Labels[LabelManagedBy] = ManagedBy + + if _, err := s.client.CoreV1().Secrets(s.namespace).Update(ctx, existing, metav1.UpdateOptions{}); err != nil { + return false, fmt.Errorf("k8sstate: update opaque secret %s: %w", name, err) + } + return true, nil +} + // OrgFromSecret extracts the org code from a managed Secret's label. Returns // the empty string if the label is missing — callers should treat that as // a foreign Secret and skip it. diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go index 4096aa221dd..53bd32bda40 100644 --- a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go +++ b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go @@ -208,3 +208,107 @@ func TestOrgFromSecret(t *testing.T) { t.Errorf("OrgFromSecret on unlabelled secret = %q, want empty", got) } } + +func TestApplyOpaqueSecret_CreatesWhenMissing(t *testing.T) { + c := fake.NewSimpleClientset() + s := NewStore(c, testNamespace) + + changed, err := s.ApplyOpaqueSecret(context.Background(), "keda-gitea-pat", "token", "pat-value") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (create)") + } + got, err := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "keda-gitea-pat", metav1.GetOptions{}) + if err != nil { + t.Fatalf("get: %v", err) + } + if got.Type != corev1.SecretTypeOpaque { + t.Errorf("type = %v, want Opaque", got.Type) + } + if string(got.Data["token"]) != "pat-value" { + t.Errorf("data[token] = %q, want pat-value", string(got.Data["token"])) + } + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by = %q, want %q", got.Labels[LabelManagedBy], ManagedBy) + } +} + +func TestApplyOpaqueSecret_NoOpOnSameValue(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "keda-gitea-pat", Namespace: testNamespace, + Labels: map[string]string{LabelManagedBy: ManagedBy}, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{"token": []byte("pat-value")}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyOpaqueSecret(context.Background(), "keda-gitea-pat", "token", "pat-value") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if changed { + t.Error("changed = true, want false (no diff)") + } +} + +func TestApplyOpaqueSecret_UpdatesOnDifference(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "keda-gitea-pat", Namespace: testNamespace}, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{"token": []byte("old-pat")}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyOpaqueSecret(context.Background(), "keda-gitea-pat", "token", "new-pat") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (update)") + } + got, _ := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "keda-gitea-pat", metav1.GetOptions{}) + if string(got.Data["token"]) != "new-pat" { + t.Errorf("data[token] = %q, want new-pat", string(got.Data["token"])) + } + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by label was not added on update, got %v", got.Labels) + } +} + +func TestApplyOpaqueSecret_PreservesOtherKeys(t *testing.T) { + // Some other actor wrote an unrelated key into the Secret; we must not + // stomp on it when applying ours. This is defence in depth against an + // operator that manages multiple keys in one Opaque Secret. + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "shared", Namespace: testNamespace}, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{ + "token": []byte("old-pat"), + "other": []byte("not-ours"), + }, + }) + s := NewStore(c, testNamespace) + + if _, err := s.ApplyOpaqueSecret(context.Background(), "shared", "token", "new-pat"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + got, _ := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "shared", metav1.GetOptions{}) + if string(got.Data["other"]) != "not-ours" { + t.Errorf("other key was overwritten: %q", string(got.Data["other"])) + } + if string(got.Data["token"]) != "new-pat" { + t.Errorf("token = %q, want new-pat", string(got.Data["token"])) + } +} + +func TestApplyOpaqueSecret_RejectsEmptyKey(t *testing.T) { + c := fake.NewSimpleClientset() + s := NewStore(c, testNamespace) + if _, err := s.ApplyOpaqueSecret(context.Background(), "x", "", "v"); err == nil { + t.Fatal("expected error for empty key, got nil") + } +} diff --git a/src/runner-org-sync/internal/telemetry/metrics.go b/src/runner-org-sync/internal/telemetry/metrics.go index 409a8b36b62..a6e08037e7f 100644 --- a/src/runner-org-sync/internal/telemetry/metrics.go +++ b/src/runner-org-sync/internal/telemetry/metrics.go @@ -25,6 +25,7 @@ type Metrics struct { KeyVaultDuration metric.Float64Histogram CDNCallDuration metric.Float64Histogram ConfigMapApplied metric.Int64Counter + KedaSecretApplied metric.Int64Counter } // NewMetrics constructs every instrument from the supplied Meter. Returns @@ -96,6 +97,9 @@ func NewMetrics(m metric.Meter) (*Metrics, error) { if err := mc(&out.ConfigMapApplied, "runner_org_sync.configmap.applied", "ConfigMap apply attempts by changed=true|false."); err != nil { return nil, err } + if err := mc(&out.KedaSecretApplied, "runner_org_sync.keda_secret.applied", "KEDA PAT Secret apply attempts by changed=true|false and success=true|false."); err != nil { + return nil, err + } return out, nil } From 65d0ce90ce8fba7dee9fb26eec8c983957fb99e2 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 09:01:37 +0200 Subject: [PATCH 07/33] fix the readme --- src/runner-org-sync/README.md | 226 +++++++++++++++++----------------- 1 file changed, 116 insertions(+), 110 deletions(-) diff --git a/src/runner-org-sync/README.md b/src/runner-org-sync/README.md index 5ee34ce7bcf..a886bbefed0 100644 --- a/src/runner-org-sync/README.md +++ b/src/runner-org-sync/README.md @@ -6,22 +6,29 @@ runners running in the Studio cluster. ## What it does -Every 15 minutes it: - -1. Loads the Gitea admin PAT from Azure Key Vault (via Workload Identity), or - from a local env var override for development. -2. Fetches `altinn-orgs.json` from `https://altinncdn.no/orgs/altinn-orgs.json`. -3. Filters orgs to those with at least one declared `environments` entry, +Each scheduled run (cadence configured by `spec.schedule` in +`infra/kustomize/cronjob.yaml`): + +1. Loads the **admin** Gitea PAT from Azure Key Vault (via Workload Identity), + or from a local env var override for development. +2. Loads the **read-only** Gitea PAT from the same Key Vault (different + secret name). This is the PAT KEDA's `github-runner` scaler will use. +3. Fetches `altinn-orgs.json` from `https://altinncdn.no/orgs/altinn-orgs.json`. +4. Filters orgs to those with at least one declared `environments` entry, then intersects with a whitelist supplied via env var. -4. For each org in the desired set: +5. For each org in the desired set: - if a `Secret altinn-gitea-runner--secret` already exists, leaves it alone — registered tokens are preserved across reconciles, - otherwise mints a fresh registration token via Gitea's admin API and creates the Secret. -5. Deletes Secrets for orgs that are no longer in the desired set. -6. Writes a single `ConfigMap/runner-org-list` projecting the desired set; +6. Deletes Secrets for orgs that are no longer in the desired set. +7. Writes a single `ConfigMap/runner-org-list` projecting the desired set; the `gitea-org-runner-config` HelmRelease picks this up via Flux - `valuesFrom` and renders one runner Deployment per entry. + `valuesFrom` and renders one KEDA `ScaledJob` per entry. +8. Projects the read-only PAT into an Opaque `Secret/keda-gitea-pat` (or + the name configured via env). KEDA's `TriggerAuthentication` references + this Secret. The Secret is created on first run; on subsequent runs it + is updated only when the KV value has changed. Continue-on-partial-failure: a single org failing to mint does not abort the run. Failed orgs are simply omitted from this tick's ConfigMap and retried @@ -35,38 +42,42 @@ rather than CronJob exit codes. │ │ Workload Identity ▼ - altinncdn.no ──► runner-org-sync (CronJob /15min) + altinncdn.no ──► runner-org-sync (CronJob) altinn-orgs.json │ │ filter: environments != ∅ ∧ whitelist ▼ - ┌───────────────────────────────────────────┐ - │ studio-runners ns │ - │ │ - │ per-org Secrets ConfigMap │ - │ ┌──────────────┐ ┌─────────────────┐ │ - │ │ ttd-secret │ │ runner-org-list │ │ - │ │ brg-secret │ │ - ttd │ │ - │ │ dsb-secret │ │ - brg │ │ - │ │ ... │ │ - dsb │ │ - │ └──────┬───────┘ └────────┬────────┘ │ - │ │ │ │ - └──────────┼───────────────────┼────────────┘ - │ │ valuesFrom - │ ▼ - │ ┌───────────────────────────────┐ - │ │ gitea-org-runner-config │ - │ │ HelmRelease (Flux) │ - │ │ │ - │ │ renders one Deployment │ - │ │ per org-in-ConfigMap │ - │ └────────────┬──────────────────┘ - │ │ - │ secretKeyRef │ - ▼ ▼ - ┌───────────────────────────────────────────┐ - │ Gitea Actions runner Deployments │ - │ one per org, each ephemeral │ - └───────────────────────────────────────────┘ + ┌─────────────────────────────────────────────────────┐ + │ studio-runners ns │ + │ │ + │ per-org Secrets ConfigMap KEDA Secret │ + │ ┌──────────────┐ ┌─────────────────┐ ┌─────────┐ │ + │ │ ttd-secret │ │ runner-org-list │ │ keda- │ │ + │ │ brg-secret │ │ - ttd │ │ gitea- │ │ + │ │ dsb-secret │ │ - brg │ │ pat │ │ + │ │ ... │ │ - dsb │ │ │ │ + │ └──────┬───────┘ └────────┬────────┘ └────┬────┘ │ + │ │ │ │ │ + └──────────┼───────────────────┼───────────────┼──────┘ + │ │ valuesFrom │ + │ ▼ │ + │ ┌───────────────────────────┐ │ secret + │ │ gitea-org-runner-config │ │ TargetRef + │ │ HelmRelease (Flux) │ │ + │ │ renders one ScaledJob │ │ + │ │ per org-in-ConfigMap │ │ + │ └────────────┬──────────────┘ │ + │ │ ▼ + │ ▼ ┌──────────────────┐ + │ ┌──────────────┐ │ TriggerAuth │ + │ │ ScaledJob │◄─┤ keda-gitea-auth │ + │ │ (per org) │ └──────────────────┘ + │ └──────┬───────┘ + │ secretKeyRef │ KEDA creates Jobs on demand + ▼ ▼ + ┌──────────────────────────────────────────────────────┐ + │ Jobs (one per workflow; pod registers, runs, │ + │ exits; GC'd after ttlSecondsAfterFinished) │ + └──────────────────────────────────────────────────────┘ │ ▼ OTel collector @@ -74,40 +85,48 @@ rather than CronJob exit codes. otel-router.observability:4317) ``` -Two distinct credentials live in distinct stores: +Three distinct credentials, three storage strategies: -| Credential | Sensitivity | Storage | -| ------------------------------------------ | ------------------------- | ------------------------------------------------------------------------------------------------------------- | -| Gitea admin PAT (mints tokens for any org) | High | Azure Key Vault, fetched at pod start via Workload Identity. Never persisted in K8s. | -| Per-org runner registration token | Lower (scoped to one org) | K8s Secret `altinn-gitea-runner--secret`, key `token`. Consumed by runner Deployment via `secretKeyRef`. | +| Credential | Sensitivity | Storage | +| ------------------------------------------ | ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Gitea admin PAT (mints tokens for any org) | High | Azure Key Vault, fetched at pod start via Workload Identity. Never persisted in K8s. | +| Per-org runner registration token | Lower (scoped to one org) | K8s Secret `altinn-gitea-runner--secret`, key `token`. Minted by runner-org-sync on first appearance of the org, consumed by the runner Pod (created by KEDA's ScaledJob) via `secretKeyRef`. | +| Read-only Gitea PAT for KEDA scaler | Lower (read-only on orgs) | Azure Key Vault → projected to K8s Secret `keda-gitea-pat`, key `token`, by runner-org-sync each tick. Consumed by KEDA's `TriggerAuthentication`. Rotates when the KV value changes (≤ tick + 30s). | ## Configuration All settings come from environment variables. The loader fails fast at startup and aggregates every validation problem into one error. -| Variable | Required | Purpose | -| -------------------------------------- | ------------------- | -------------------------------------------------------------------------------------------------- | -| `RUNNER_ORG_SYNC_GITEA_URL` | yes | Base URL for Gitea admin API | -| `RUNNER_ORG_SYNC_ORGS_JSON_URL` | yes | URL of `altinn-orgs.json` | -| `RUNNER_ORG_SYNC_OUTPUT_NAMESPACE` | yes | Target namespace (e.g. `studio-runners`) | -| `RUNNER_ORG_SYNC_SECRET_NAME_PATTERN` | yes | Must contain the `{org}` placeholder, e.g. `altinn-gitea-runner-{org}-secret` | -| `RUNNER_ORG_SYNC_CONFIGMAP_NAME` | yes | e.g. `runner-org-list` | -| `RUNNER_ORG_SYNC_KEYVAULT_NAME` | if no env PAT | Azure Key Vault name | -| `RUNNER_ORG_SYNC_KEYVAULT_SECRET_NAME` | if no env PAT | Secret name inside the vault | -| `RUNNER_ORG_SYNC_SYNC_ALL` | no | `true` to skip the whitelist filter | -| `RUNNER_ORG_SYNC_ORGS` | if `SYNC_ALL=false` | CSV whitelist, e.g. `ttd,brg,dsb` | -| `RUNNER_ORG_SYNC_GITEA_PAT` | no | Local-dev bypass for Key Vault. Source is logged at startup. | -| `OTEL_EXPORTER_OTLP_ENDPOINT` | no | OTel collector endpoint (defaults via SDK) | -| `OTEL_SERVICE_NAME` | no | Defaults to `runner-org-sync` | -| `OTEL_RESOURCE_ATTRIBUTES` | no | e.g. `deployment.environment=dev` | -| `AZURE_*` | injected | Workload Identity webhook fills `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_FEDERATED_TOKEN_FILE` | +| Variable | Required | Purpose | +| ----------------------------------------------- | ------------------- | -------------------------------------------------------------------------------------------------- | +| `RUNNER_ORG_SYNC_GITEA_URL` | yes | Base URL for Gitea admin API | +| `RUNNER_ORG_SYNC_ORGS_JSON_URL` | yes | URL of `altinn-orgs.json` | +| `RUNNER_ORG_SYNC_OUTPUT_NAMESPACE` | yes | Target namespace (e.g. `studio-runners`) | +| `RUNNER_ORG_SYNC_SECRET_NAME_PATTERN` | yes | Must contain the `{org}` placeholder, e.g. `altinn-gitea-runner-{org}-secret` | +| `RUNNER_ORG_SYNC_CONFIGMAP_NAME` | yes | e.g. `runner-org-list` | +| `RUNNER_ORG_SYNC_KEYVAULT_NAME` | if no env admin PAT | Azure Key Vault name (shared by both PATs) | +| `RUNNER_ORG_SYNC_KEYVAULT_SECRET_NAME` | if no env admin PAT | KV secret name holding the **admin** PAT | +| `RUNNER_ORG_SYNC_KEDA_PAT_KEYVAULT_SECRET_NAME` | if no env KEDA PAT | KV secret name holding the **read-only** PAT for KEDA | +| `RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME` | yes | Name of the K8s Secret to write the read-only PAT into (e.g. `keda-gitea-pat`) | +| `RUNNER_ORG_SYNC_KEDA_PAT_SECRET_KEY` | yes | Data key inside that Secret (e.g. `token`) | +| `RUNNER_ORG_SYNC_SYNC_ALL` | no | `true` to skip the whitelist filter | +| `RUNNER_ORG_SYNC_ORGS` | if `SYNC_ALL=false` | CSV whitelist, e.g. `ttd,brg,dsb` | +| `RUNNER_ORG_SYNC_GITEA_PAT` | no | Local-dev bypass for admin PAT lookup. Source is logged at startup. | +| `RUNNER_ORG_SYNC_KEDA_PAT` | no | Local-dev bypass for KEDA PAT lookup. Source is logged at startup. | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | no | OTel collector endpoint (defaults via SDK) | +| `OTEL_SERVICE_NAME` | no | Defaults to `runner-org-sync` | +| `OTEL_RESOURCE_ATTRIBUTES` | no | e.g. `deployment.environment=dev` | +| `AZURE_*` | injected | Workload Identity webhook fills `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_FEDERATED_TOKEN_FILE` | ## Reconcile algorithm ``` +adminPAT = loadPAT(env override → else KV) +kedaPAT = loadPAT(env override → else KV) + desired = (orgs with non-empty environments) ∩ whitelist (or all if syncAll=true) -existing_secrets = Secrets matching SECRET_NAME_PATTERN +existing_secrets = managed Secrets in the output namespace for org in desired: if Secret exists for org: @@ -121,12 +140,17 @@ for secret in existing_secrets: delete Secret apply ConfigMap with one entry per (desired ∩ orgs whose Secret now exists) + +apply Opaque Secret with key=token, value=kedaPAT + (no-op if existing value matches; update otherwise) ``` -Existing Secrets are never re-minted; this preserves any in-flight runner -registrations and avoids churn on Deployments that already work. Deletions -remove only the K8s Secret; orphaned Gitea-side runner records are left to -go idle (cleanup is a separate concern). +Existing per-org Secrets are never re-minted; this preserves any in-flight +runner registrations and avoids churn on ScaledJobs that already work. The +KEDA Secret IS updated when the KV value changes, so KV rotation propagates +automatically within one tick + KEDA's polling interval. Deletions remove +only the K8s Secret; orphaned Gitea-side runner records are left to go idle +(cleanup is a separate concern). ## Observability @@ -145,35 +169,46 @@ work surfaces as span events on the parent span (`org.token.minted`, **the signal worth paging on if sustained non-zero** - `runner_org_sync.{gitea,cdn,keyvault}.call.duration` (histograms) -**Logs** — JSON via `slog`, ~5 lines per healthy run plus any WARNs: +**Logs** — JSON via `slog`, ~7 lines per healthy run plus any WARNs: ``` -reconcile.start run_id=... -pat.loaded source=keyvault len=40 -orgs.kept count=9 orgs=[ttd,brg,dsb,...] -org.reconcile.failed org=dsb stage=mint err=... (WARN, only on failure) -reconcile.end duration_ms=... outcome=success|partial|failure +reconcile.start run_id=... +pat.loaded scope=admin source=keyvault len=40 +pat.loaded scope=keda source=keyvault len=40 +orgs.kept count=9 orgs=[ttd,brg,dsb,...] +org.reconcile.failed org=dsb stage=mint err=... (WARN, only on failure) +keda.secret.applied secret=keda-gitea-pat changed=true|false +reconcile.end duration_ms=... outcome=success|partial|failure ``` +The `pat.loaded` lines surface the source per credential — accidental +fallback to env override in a non-local environment is immediately visible. +`keda.secret.applied changed=true` is the audit trail for a KV rotation +propagation. + ## Local development -The Gitea PAT can come from an env var instead of Key Vault, sidestepping -the need for Azure auth on a laptop: +Both PATs can come from env vars instead of Key Vault, sidestepping the +need for Azure auth on a laptop: ```sh -export RUNNER_ORG_SYNC_GITEA_PAT='your-local-or-test-pat' +export RUNNER_ORG_SYNC_GITEA_PAT='your-local-or-test-admin-pat' +export RUNNER_ORG_SYNC_KEDA_PAT='your-local-or-test-readonly-pat' export RUNNER_ORG_SYNC_GITEA_URL='http://localhost:3000' export RUNNER_ORG_SYNC_ORGS_JSON_URL='https://altinncdn.no/orgs/altinn-orgs.json' export RUNNER_ORG_SYNC_OUTPUT_NAMESPACE='studio-runners' export RUNNER_ORG_SYNC_SECRET_NAME_PATTERN='altinn-gitea-runner-{org}-secret' export RUNNER_ORG_SYNC_CONFIGMAP_NAME='runner-org-list' +export RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME='keda-gitea-pat' +export RUNNER_ORG_SYNC_KEDA_PAT_SECRET_KEY='token' export RUNNER_ORG_SYNC_ORGS='ttd,brg' go run ./cmd/runner-org-sync ``` -The first log line will read `pat.loaded source=env`, making any accidental -fallback in a non-local environment immediately visible. +The startup log will read `pat.loaded scope=admin source=env` and +`pat.loaded scope=keda source=env`, making any accidental fallback in a +non-local environment immediately visible. ## Testing @@ -186,37 +221,8 @@ make lint # golangci-lint Unit tests use stdlib `testing`, `net/http/httptest`, and `k8s.io/client-go/kubernetes/fake`. No testify, no other test frameworks. -Integration tests (kind-based) live under `test/integration/` and use stub -CDN + stub Gitea services in-cluster. They cover seven scenarios: - -1. Cold start -2. Idempotent re-run (no writes on unchanged input) -3. Org added -4. Org removed -5. Org with empty `environments` (filtered out) -6. Whitelist excludes -7. Gitea partial failure (one org fails, others succeed) - -Workload Identity is Azure-specific and is not covered by kind tests; -verify that path manually in a dev cluster. - -## Project layout - -``` -. -├── cmd/runner-org-sync/ entry point -├── internal/ -│ ├── config/ env-var loading + validation -│ ├── cdn/ altinn-orgs.json fetch + decode -│ ├── gitea/ registration-token mint client -│ ├── keyvault/ PAT loader (env override → Key Vault) -│ ├── k8sstate/ Secret + ConfigMap reconcile primitives -│ ├── reconcile/ pure orchestration -│ └── telemetry/ OTel + slog wiring -├── test/integration/ kind harness + scenarios -├── infra/kustomize/ Kubernetes manifests (Flux post-build substitution) -├── Dockerfile -├── Makefile -├── go.mod, go.sum -└── README.md -``` +End-to-end integration testing (kind-based, real Gitea + real KEDA, scenarios +TBD) is **in progress** — the approach is being designed alongside the +staging rollout rather than scaffolded up-front. Workload Identity is +Azure-specific and won't be covered by kind tests regardless; that path is +verified manually in a real cluster. From 0db8e572c273f9e1750775b9980ab428799ae748 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 09:04:57 +0200 Subject: [PATCH 08/33] trigger scaling --- src/runner-org-sync/README.md | 18 ++++++++++++++++++ .../infra/kustomize/kustomization.yaml | 1 + .../infra/kustomize/triggerauthentication.yaml | 17 +++++++++++++++++ 3 files changed, 36 insertions(+) create mode 100644 src/runner-org-sync/infra/kustomize/triggerauthentication.yaml diff --git a/src/runner-org-sync/README.md b/src/runner-org-sync/README.md index a886bbefed0..f8112ca51ce 100644 --- a/src/runner-org-sync/README.md +++ b/src/runner-org-sync/README.md @@ -93,6 +93,24 @@ Three distinct credentials, three storage strategies: | Per-org runner registration token | Lower (scoped to one org) | K8s Secret `altinn-gitea-runner--secret`, key `token`. Minted by runner-org-sync on first appearance of the org, consumed by the runner Pod (created by KEDA's ScaledJob) via `secretKeyRef`. | | Read-only Gitea PAT for KEDA scaler | Lower (read-only on orgs) | Azure Key Vault → projected to K8s Secret `keda-gitea-pat`, key `token`, by runner-org-sync each tick. Consumed by KEDA's `TriggerAuthentication`. Rotates when the KV value changes (≤ tick + 30s). | +### KEDA wiring + +The `TriggerAuthentication/keda-gitea-auth` lives in +`infra/kustomize/triggerauthentication.yaml` — ships with this service so +the Secret writer and the auth ref are deployed atomically. Three names +must agree across this folder and the workload chart: + +| Where | Field | Value | +| ------------------------------------- | ------------------------------------------------------ | -------------------------- | +| `cronjob.yaml` (env) | `RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME` / `_SECRET_KEY` | `keda-gitea-pat` / `token` | +| `triggerauthentication.yaml` | `secretTargetRef.name` / `.key` | `keda-gitea-pat` / `token` | +| `charts/gitea-org-runner/values.yaml` | `keda.authenticationRef.name` | `keda-gitea-auth` | + +The chart only consumes the TriggerAuth name as a reference; it does not +define the Secret or the TriggerAuth itself. Renaming any of the above +requires updating all four entries together — otherwise KEDA scalers +fail with `auth ref not found`. + ## Configuration All settings come from environment variables. The loader fails fast at diff --git a/src/runner-org-sync/infra/kustomize/kustomization.yaml b/src/runner-org-sync/infra/kustomize/kustomization.yaml index ca60aab1837..334c888836e 100644 --- a/src/runner-org-sync/infra/kustomize/kustomization.yaml +++ b/src/runner-org-sync/infra/kustomize/kustomization.yaml @@ -8,6 +8,7 @@ resources: - role.yaml - rolebinding.yaml - cronjob.yaml + - triggerauthentication.yaml # Copy the image annotation onto the container spec. The annotation value # is itself substituted at deploy time by Flux post-build substitution. diff --git a/src/runner-org-sync/infra/kustomize/triggerauthentication.yaml b/src/runner-org-sync/infra/kustomize/triggerauthentication.yaml new file mode 100644 index 00000000000..755cdddc24a --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/triggerauthentication.yaml @@ -0,0 +1,17 @@ +# KEDA TriggerAuthentication for the github-runner scaler. +# +# The .name + .secretTargetRef[0].name + .secretTargetRef[0].key fields here +# are pinned to the same values runner-org-sync writes via env on its CronJob +# (RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME / _SECRET_KEY) and the same name the +# workload chart references (charts/gitea-org-runner values.yaml's +# keda.authenticationRef.name). If you rename any of them, change all four +# places at once or KEDA scalers will fail with "auth ref not found". +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: keda-gitea-auth +spec: + secretTargetRef: + - parameter: personalAccessToken + name: keda-gitea-pat + key: token From 7679ab7b95ddda6937bcf65337072a02a95106b5 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 09:09:02 +0200 Subject: [PATCH 09/33] add a deploy workflow --- .github/workflows/deploy-runner-org-sync.yaml | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 .github/workflows/deploy-runner-org-sync.yaml diff --git a/.github/workflows/deploy-runner-org-sync.yaml b/.github/workflows/deploy-runner-org-sync.yaml new file mode 100644 index 00000000000..12a9b3700cd --- /dev/null +++ b/.github/workflows/deploy-runner-org-sync.yaml @@ -0,0 +1,111 @@ +name: Deploy Runner Org Sync +on: + push: + branches: [main] + paths: + - 'src/runner-org-sync/**' + - '.github/workflows/deploy-runner-org-sync.yaml' + workflow_dispatch: + inputs: + environments: + description: 'Environments to deploy to. Multiple environments can be specified by separating them with a comma.' + required: false + default: 'dev' + +permissions: + id-token: write + contents: read + +jobs: + construct-environments-array: + uses: ./.github/workflows/template-studio-construct-environments.yaml + with: + inputs: ${{ toJSON(github.event.inputs) }} + + push-artifact: + name: Push runner-org-sync as OCI artifact + runs-on: ubuntu-latest + environment: dev + env: + REGISTRY_NAME: altinntjenestercontainerregistry + outputs: + CONFIG_REPO: ${{ steps.vars.outputs.config-repo }} + defaults: + run: + working-directory: src/runner-org-sync + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - name: Set vars + id: vars + run: | + SHA="${GITHUB_SHA::10}" + echo "short-sha=$SHA" >> "$GITHUB_OUTPUT" + echo "image-repo=altinntjenestercontainerregistry.azurecr.io/altinn-studio/runner-org-sync:${SHA}" >> "$GITHUB_OUTPUT" + echo "config-repo=altinntjenestercontainerregistry.azurecr.io/altinn-studio/configs/runner-org-sync-repo:${SHA}" >> "$GITHUB_OUTPUT" + + - name: az login + uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID_FC }} + tenant-id: ${{ secrets.AZURE_TENANT_ID_FC }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID_FC }} + + - name: az acr login + run: az acr login --name ${{ env.REGISTRY_NAME }} + + - name: flux install + uses: fluxcd/flux2/action@bfa461ed2153ae5e0cca6bce08e0845268fb3088 # v2.8.2 + + - name: docker build + run: docker build -t ${{ steps.vars.outputs.image-repo }} -f Dockerfile . + + - name: push image + run: docker push ${{ steps.vars.outputs.image-repo }} + + - name: patch base with image tag + working-directory: src/runner-org-sync/infra/kustomize + run: | + export IMAGE="${{ steps.vars.outputs.image-repo }}" + export IMAGE_TAG="${{ steps.vars.outputs.short-sha }}" + yq -i '.metadata.annotations["altinn.studio/image"] = env(IMAGE)' cronjob.yaml + yq -i '.metadata.annotations["altinn.studio/image-tag"] = env(IMAGE_TAG)' cronjob.yaml + + - name: push artifact + working-directory: src/runner-org-sync/infra/kustomize + run: | + flux push artifact oci://${{ steps.vars.outputs.config-repo }} \ + --provider=azure \ + --reproducible \ + --path="." \ + --source="$(git config --get remote.origin.url)" \ + --revision="$(git branch --show-current)/$(git rev-parse HEAD)" + + tag: + name: Tag artifact + needs: [push-artifact, construct-environments-array] + runs-on: ubuntu-latest + environment: ${{ matrix.environment }} + env: + REGISTRY_NAME: altinntjenestercontainerregistry + strategy: + matrix: + include: ${{ fromJSON(needs.construct-environments-array.outputs.result) }} + steps: + - name: az login + uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID_FC }} + tenant-id: ${{ secrets.AZURE_TENANT_ID_FC }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID_FC }} + + - name: az acr login + run: az acr login --name ${{ env.REGISTRY_NAME }} + + - name: flux install + uses: fluxcd/flux2/action@bfa461ed2153ae5e0cca6bce08e0845268fb3088 # v2.8.2 + + - name: tag artifact + run: | + flux tag artifact oci://${{ needs.push-artifact.outputs.CONFIG_REPO }} \ + --tag ${{ matrix.environment }} From f32d622532f70a2ac019b2be0161dee540be4b2f Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 09:23:16 +0200 Subject: [PATCH 10/33] wire kustomize files corerctly --- infra/studio/syncroot/base/kustomization.yaml | 1 + .../studio/syncroot/base/runner-org-sync.yaml | 53 +++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100644 infra/studio/syncroot/base/runner-org-sync.yaml diff --git a/infra/studio/syncroot/base/kustomization.yaml b/infra/studio/syncroot/base/kustomization.yaml index df308c1fc6f..ad4c5a1221c 100644 --- a/infra/studio/syncroot/base/kustomization.yaml +++ b/infra/studio/syncroot/base/kustomization.yaml @@ -12,3 +12,4 @@ resources: - observability.yaml - otel-operator.yaml - keda.yaml + - runner-org-sync.yaml diff --git a/infra/studio/syncroot/base/runner-org-sync.yaml b/infra/studio/syncroot/base/runner-org-sync.yaml new file mode 100644 index 00000000000..e7b449ebfdf --- /dev/null +++ b/infra/studio/syncroot/base/runner-org-sync.yaml @@ -0,0 +1,53 @@ +apiVersion: source.toolkit.fluxcd.io/v1 +kind: OCIRepository +metadata: + name: runner-org-sync + namespace: default +spec: + interval: 5m + # Single artifact, multi-env tagged. Mirrors the lhci pattern (vs. Designer + # which uses per-env artifact names). The deploy-runner-org-sync workflow + # in .github/workflows pushes one artifact per build SHA, then tags it + # with the environment name in the `tag` matrix job. + url: oci://altinntjenestercontainerregistry.azurecr.io/altinn-studio/configs/runner-org-sync-repo + ref: + tag: ${ENVIRONMENT} + provider: azure +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: runner-org-sync + namespace: default +spec: + interval: 5m + targetNamespace: studio-runners + sourceRef: + kind: OCIRepository + name: runner-org-sync + namespace: default + path: ./ + prune: true + timeout: 1m + postBuild: + # These variables are double-substituted: the outer syncroot Kustomization + # (provisioned in altinn-studio-infra/provisioning/studio-flux-syncroot.tf) + # resolves ${...} on the right-hand sides first, then this inner + # Kustomization applies the resolved values to the kustomize manifests + # pulled from the OCI artifact above. + substitute: + ENVIRONMENT: ${ENVIRONMENT} + RUNNER_ORG_SYNC_ENTRA_CLIENT_ID: ${RUNNER_ORG_SYNC_ENTRA_CLIENT_ID} + # The kustomize manifests in src/runner-org-sync/infra/kustomize also + # reference these two; they must be sourced from somewhere before this + # Kustomization can reconcile cleanly. Three options: + # 1. Add to studio-flux-syncroot.tf's postBuild.substitute (alongside + # ENTRA_CLIENT_ID). Best for values Terraform already knows + # (KEYVAULT_NAME = azurerm_key_vault.kv.name). + # 2. Hardcode literal values here. Best for stable per-env values + # with no Terraform counterpart. + # 3. Move the runner-org-sync resource to per-env syncroot overlays + # (infra/studio/syncroot/{dev,staging,prod}/) so each env can + # patch its own values. + # RUNNER_ORG_SYNC_KEYVAULT_NAME: ??? + # RUNNER_ORG_SYNC_ORGS: ??? From 5d9656fbd6165db7896001266399f4a0290aa7af Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 15:16:40 +0200 Subject: [PATCH 11/33] fix chart --- .../templates/helm-release.yaml | 44 +++++++++++++------ charts/gitea-org-runner-config/values.yaml | 29 ++++++------ infra/studio/syncroot/dev/kustomization.yaml | 18 ++++++++ infra/studio/syncroot/prod/kustomization.yaml | 18 ++++++++ .../syncroot/staging/kustomization.yaml | 17 +++++++ 5 files changed, 99 insertions(+), 27 deletions(-) diff --git a/charts/gitea-org-runner-config/templates/helm-release.yaml b/charts/gitea-org-runner-config/templates/helm-release.yaml index 72991df60e5..844abb3f9ef 100644 --- a/charts/gitea-org-runner-config/templates/helm-release.yaml +++ b/charts/gitea-org-runner-config/templates/helm-release.yaml @@ -32,6 +32,28 @@ spec: enabled: {{ .Values.dockerInDocker.enabled }} shareWorkspace: enabled: {{ .Values.shareWorkspace.enabled }} + + # Chart-level Gitea config: the workload chart synthesises each runner + # pod's GITEA_INSTANCE_URL / RUNNER_LABELS env vars from here. The KEDA + # github-runner scaler uses apiUrl to poll org queue depth. + gitea: + instanceUrl: "{{ .Values.giteaInstanceUrl }}" + apiUrl: "{{ .Values.giteaInstanceUrl }}/api/v1" + runnerLabels: "ubuntu-latest:host" + + # KEDA wiring: name of the TriggerAuthentication ScaledJobs reference. + # The CR is materialised by runner-org-sync's deploy in the same + # namespace (src/runner-org-sync/infra/kustomize/triggerauthentication.yaml). + keda: + authenticationRef: + name: keda-gitea-auth + + # Per-org concurrency caps. Default applies to every org; overrides + # keyed by org name take precedence. + maxConcurrentDefault: {{ .Values.maxConcurrentDefault | default 2 }} + maxConcurrentOverrides: + {{- toYaml (.Values.maxConcurrentOverrides | default dict) | nindent 6 }} + resources: requests: cpu: 500m @@ -41,24 +63,18 @@ spec: cpu: "2" memory: 2Gi ephemeral-storage: 20Gi + + # Slim per-org list. The workload chart pairs each entry's `name` with + # chart-level gitea/keda config to render a ScaledJob, and mounts + # `registrationTokenSecretName` via secretKeyRef on the runner pod. + # No per-runner environmentVariables block needed; no `replicas` + # (KEDA owns scaling via maxConcurrent*). runners: - {{- $giteaUrl := .Values.giteaInstanceUrl }} {{- range index .Values.runners .Values.environment }} - name: {{ .name }} - replicas: {{ .replicas }} - environmentVariables: - - name: GITEA_INSTANCE_URL - value: "{{ $giteaUrl }}" - - name: GITEA_RUNNER_EPHEMERAL - value: "1" - - name: GITEA_RUNNER_LABELS - value: "ubuntu-latest:host" - - name: GITEA_RUNNER_REGISTRATION_TOKEN - valueFrom: - secretKeyRef: - name: {{ .registrationTokenSecretName }} - key: token + registrationTokenSecretName: {{ .registrationTokenSecretName }} {{- end }} + tolerations: - key: "purpose" operator: "Equal" diff --git a/charts/gitea-org-runner-config/values.yaml b/charts/gitea-org-runner-config/values.yaml index ab5765bf009..109632e7ecb 100644 --- a/charts/gitea-org-runner-config/values.yaml +++ b/charts/gitea-org-runner-config/values.yaml @@ -10,43 +10,46 @@ dockerInDocker: shareWorkspace: enabled: true -# Environment-specific runner configurations +# Per-org concurrency caps applied to KEDA ScaledJobs in the workload chart. +# `maxConcurrentDefault` applies to every org; `maxConcurrentOverrides` keys +# named exceptions per org. Overrides for orgs absent from the active +# environment's runners list are harmlessly ignored. +maxConcurrentDefault: 2 +maxConcurrentOverrides: + brg: 6 + ssb: 5 + +# Environment-specific runner lists. Entries are intentionally slim: +# the workload chart synthesises all per-runner env vars (GITEA_INSTANCE_URL, +# GITEA_RUNNER_LABELS, registration-token secretKeyRef, etc.) from chart-level +# config plus the org name; no per-runner environmentVariables block is needed. +# `replicas` is also gone — KEDA's ScaledJob owns scaling via maxConcurrent*. +# # dev/staging: only ttd -# prod: ttd, brg, dsb, ssb, ksdigi, pat, dibk, skm +# prod: ttd, brg, dsb, ssb, ksdigi, pat, dibk, skm, sfvt runners: dev: - name: ttd - replicas: 1 registrationTokenSecretName: altinn-gitea-runner-ttd-secret staging: - name: ttd - replicas: 1 registrationTokenSecretName: altinn-gitea-runner-ttd-secret prod: - name: ttd - replicas: 1 registrationTokenSecretName: altinn-gitea-runner-ttd-secret - name: brg - replicas: 6 registrationTokenSecretName: altinn-gitea-runner-brg-secret - name: dsb - replicas: 1 registrationTokenSecretName: altinn-gitea-runner-dsb-secret - name: ssb - replicas: 5 registrationTokenSecretName: altinn-gitea-runner-ssb-secret - name: ksdigi - replicas: 1 registrationTokenSecretName: altinn-gitea-runner-ksdigi-secret - name: pat - replicas: 1 registrationTokenSecretName: altinn-gitea-runner-pat-secret - name: dibk - replicas: 1 registrationTokenSecretName: altinn-gitea-runner-dibk-secret - name: skm - replicas: 1 registrationTokenSecretName: altinn-gitea-runner-skm-secret - name: sfvt - replicas: 1 registrationTokenSecretName: altinn-gitea-runner-sfvt-secret diff --git a/infra/studio/syncroot/dev/kustomization.yaml b/infra/studio/syncroot/dev/kustomization.yaml index 27bb5cb0b48..c1b3ee6e286 100644 --- a/infra/studio/syncroot/dev/kustomization.yaml +++ b/infra/studio/syncroot/dev/kustomization.yaml @@ -2,3 +2,21 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - ../base + +# Per-env substitutions for the runner-org-sync Flux Kustomization. KV name +# follows the cluster-wide convention altinn-studio--kv. ORGS lists the +# whitelist runner-org-sync uses to filter the CDN. Add an entry here when +# onboarding a new dev-tier org. +patches: + - target: + group: kustomize.toolkit.fluxcd.io + version: v1 + kind: Kustomization + name: runner-org-sync + patch: |- + - op: add + path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_KEYVAULT_NAME + value: altinn-studio-dev-kv + - op: add + path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_ORGS + value: ttd diff --git a/infra/studio/syncroot/prod/kustomization.yaml b/infra/studio/syncroot/prod/kustomization.yaml index 222e156a829..cb1df0d95a9 100644 --- a/infra/studio/syncroot/prod/kustomization.yaml +++ b/infra/studio/syncroot/prod/kustomization.yaml @@ -17,3 +17,21 @@ patches: metadata: name: altinn-altinity-agents namespace: default + + # Per-env substitutions for the runner-org-sync Flux Kustomization. KV + # name follows the cluster-wide convention altinn-studio--kv. ORGS + # lists the whitelist runner-org-sync uses to filter the CDN; this list + # must agree with the prod entry of runners in + # charts/gitea-org-runner-config/values.yaml. + - target: + group: kustomize.toolkit.fluxcd.io + version: v1 + kind: Kustomization + name: runner-org-sync + patch: |- + - op: add + path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_KEYVAULT_NAME + value: altinn-studio-prod-kv + - op: add + path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_ORGS + value: ttd,brg,dsb,ssb,ksdigi,pat,dibk,skm,sfvt diff --git a/infra/studio/syncroot/staging/kustomization.yaml b/infra/studio/syncroot/staging/kustomization.yaml index 222e156a829..3edc82bf974 100644 --- a/infra/studio/syncroot/staging/kustomization.yaml +++ b/infra/studio/syncroot/staging/kustomization.yaml @@ -17,3 +17,20 @@ patches: metadata: name: altinn-altinity-agents namespace: default + + # Per-env substitutions for the runner-org-sync Flux Kustomization. KV + # name follows the cluster-wide convention altinn-studio--kv. ORGS + # lists the whitelist runner-org-sync uses to filter the CDN. Add an + # entry here when onboarding a new staging-tier org. + - target: + group: kustomize.toolkit.fluxcd.io + version: v1 + kind: Kustomization + name: runner-org-sync + patch: |- + - op: add + path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_KEYVAULT_NAME + value: altinn-studio-staging-kv + - op: add + path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_ORGS + value: ttd From 68ee589d5db23ca378256b148e27351d4a6acc5a Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 15:23:23 +0200 Subject: [PATCH 12/33] deploy sync to staging --- .github/workflows/deploy-runner-org-sync.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy-runner-org-sync.yaml b/.github/workflows/deploy-runner-org-sync.yaml index 12a9b3700cd..768a02f0cdd 100644 --- a/.github/workflows/deploy-runner-org-sync.yaml +++ b/.github/workflows/deploy-runner-org-sync.yaml @@ -1,7 +1,7 @@ name: Deploy Runner Org Sync on: push: - branches: [main] + branches: [main, feat/runners-autoscaling] paths: - 'src/runner-org-sync/**' - '.github/workflows/deploy-runner-org-sync.yaml' @@ -20,7 +20,11 @@ jobs: construct-environments-array: uses: ./.github/workflows/template-studio-construct-environments.yaml with: - inputs: ${{ toJSON(github.event.inputs) }} + # Push to the autoscaling feature branch deploys to staging only. + # Push to main and manual dispatches fall through to github.event.inputs + # (which is empty on push to main → template applies its own defaults, + # and on dispatch → the value the user typed in the form). + inputs: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/feat/runners-autoscaling') && '{"environments":"staging"}' || toJSON(github.event.inputs) }} push-artifact: name: Push runner-org-sync as OCI artifact From 67697e714d6d8cb136442e706505553fea303d89 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 15:41:07 +0200 Subject: [PATCH 13/33] keda namespace fix --- infra/studio/syncroot/base/keda.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/infra/studio/syncroot/base/keda.yaml b/infra/studio/syncroot/base/keda.yaml index 0f0e2a24966..857114edde9 100644 --- a/infra/studio/syncroot/base/keda.yaml +++ b/infra/studio/syncroot/base/keda.yaml @@ -1,9 +1,4 @@ --- -apiVersion: v1 -kind: Namespace -metadata: - name: keda ---- apiVersion: source.toolkit.fluxcd.io/v1 kind: HelmRepository metadata: @@ -25,6 +20,11 @@ spec: releaseName: keda targetNamespace: keda install: + # The keda namespace is created by Helm at install time. This avoids + # the kustomize/Flux namespace transformation that would otherwise + # rename a sibling Namespace resource in the syncroot to "default" + # and conflict with any other Namespace resource at the same level. + createNamespace: true remediation: retries: 5 upgrade: From 1d8cd4e8374830813cd12864ae1fb5a188a72fae Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 16:27:10 +0200 Subject: [PATCH 14/33] fix keda --- infra/studio/syncroot/base/keda.yaml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/infra/studio/syncroot/base/keda.yaml b/infra/studio/syncroot/base/keda.yaml index 857114edde9..f3495f2c3ad 100644 --- a/infra/studio/syncroot/base/keda.yaml +++ b/infra/studio/syncroot/base/keda.yaml @@ -1,9 +1,14 @@ --- +# HelmRepository + HelmRelease live in `default` (the outer syncroot's +# targetNamespace) because the keda namespace doesn't exist yet — the +# chart's install creates it via install.createNamespace below. The chart +# still installs INTO `keda` via spec.targetNamespace; only the Flux source +# and release plumbing live in default. apiVersion: source.toolkit.fluxcd.io/v1 kind: HelmRepository metadata: name: keda - namespace: keda + namespace: default spec: type: oci interval: 1h @@ -13,17 +18,15 @@ apiVersion: helm.toolkit.fluxcd.io/v2 kind: HelmRelease metadata: name: keda - namespace: keda + namespace: default spec: interval: 10m timeout: 5m releaseName: keda targetNamespace: keda install: - # The keda namespace is created by Helm at install time. This avoids - # the kustomize/Flux namespace transformation that would otherwise - # rename a sibling Namespace resource in the syncroot to "default" - # and conflict with any other Namespace resource at the same level. + # Helm creates the keda namespace at install time so the operator + # resources (Deployments, Services, ConfigMaps...) have somewhere to land. createNamespace: true remediation: retries: 5 @@ -37,7 +40,7 @@ spec: sourceRef: kind: HelmRepository name: keda - namespace: keda + namespace: default values: # KEDA's defaults are sensible. Keep this block intentionally small; # any per-environment knobs (e.g. higher resource limits in prod) can From 0a4d99ca9acf4f5fe9f13efed52fd5f091bbad33 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 16:44:04 +0200 Subject: [PATCH 15/33] witch to HTTPS chart repo, OCI path was 403 --- infra/studio/syncroot/base/keda.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/infra/studio/syncroot/base/keda.yaml b/infra/studio/syncroot/base/keda.yaml index f3495f2c3ad..9feba499df3 100644 --- a/infra/studio/syncroot/base/keda.yaml +++ b/infra/studio/syncroot/base/keda.yaml @@ -10,9 +10,11 @@ metadata: name: keda namespace: default spec: - type: oci + # KEDA's canonical Helm repository (the URL their docs link to). The OCI + # variant at oci://ghcr.io/kedacore/charts returns 403 on anonymous pulls, + # so we use HTTPS — same chart contents, no auth quirks. interval: 1h - url: oci://ghcr.io/kedacore/charts + url: https://kedacore.github.io/charts --- apiVersion: helm.toolkit.fluxcd.io/v2 kind: HelmRelease From 4cd97d75ce656679972daa06bfcee0ccc0b91c4d Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 17:37:08 +0200 Subject: [PATCH 16/33] fix githubAPIURL --- charts/gitea-org-runner/templates/scaledjob.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/charts/gitea-org-runner/templates/scaledjob.yaml b/charts/gitea-org-runner/templates/scaledjob.yaml index 67920f3393c..21e39522105 100644 --- a/charts/gitea-org-runner/templates/scaledjob.yaml +++ b/charts/gitea-org-runner/templates/scaledjob.yaml @@ -162,7 +162,10 @@ spec: metadata: # github-runner scaler is GitHub-compatible; pointed at Gitea's API URL # it polls the org's Actions queue depth via the same shape. - githubAPIURL: {{ $.Values.gitea.apiUrl | quote }} + # NOTE: the metadata key is `githubApiURL` — lowercase 'g', capital + # 'A' in 'Api', all-caps 'URL'. Any other casing is silently ignored + # and the scaler falls back to https://api.github.com. + githubApiURL: {{ $.Values.gitea.apiUrl | quote }} owner: {{ .name | quote }} runnerScope: "org" labels: {{ $.Values.gitea.runnerLabels | quote }} From 73f156d5d310d70aa791c16f0e8f1652ae1a3598 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 17:57:28 +0200 Subject: [PATCH 17/33] match label host for keda --- .../templates/helm-release.yaml | 4 ++++ charts/gitea-org-runner/templates/scaledjob.yaml | 2 +- charts/gitea-org-runner/values.yaml | 12 ++++++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/charts/gitea-org-runner-config/templates/helm-release.yaml b/charts/gitea-org-runner-config/templates/helm-release.yaml index 844abb3f9ef..c614889ca13 100644 --- a/charts/gitea-org-runner-config/templates/helm-release.yaml +++ b/charts/gitea-org-runner-config/templates/helm-release.yaml @@ -39,7 +39,11 @@ spec: gitea: instanceUrl: "{{ .Values.giteaInstanceUrl }}" apiUrl: "{{ .Values.giteaInstanceUrl }}/api/v1" + # The runner advertises `ubuntu-latest:host` to act_runner (`:host` + # selects run-on-host mode); the KEDA scaler filters by `ubuntu-latest` + # only, matching what workflows put in `runs-on:`. runnerLabels: "ubuntu-latest:host" + scalerLabels: "ubuntu-latest" # KEDA wiring: name of the TriggerAuthentication ScaledJobs reference. # The CR is materialised by runner-org-sync's deploy in the same diff --git a/charts/gitea-org-runner/templates/scaledjob.yaml b/charts/gitea-org-runner/templates/scaledjob.yaml index 21e39522105..7030693fa31 100644 --- a/charts/gitea-org-runner/templates/scaledjob.yaml +++ b/charts/gitea-org-runner/templates/scaledjob.yaml @@ -168,7 +168,7 @@ spec: githubApiURL: {{ $.Values.gitea.apiUrl | quote }} owner: {{ .name | quote }} runnerScope: "org" - labels: {{ $.Values.gitea.runnerLabels | quote }} + labels: {{ $.Values.gitea.scalerLabels | default "ubuntu-latest" | quote }} targetWorkflowQueueLength: {{ $.Values.keda.targetWorkflowQueueLength | default "1" | quote }} authenticationRef: name: {{ $.Values.keda.authenticationRef.name }} diff --git a/charts/gitea-org-runner/values.yaml b/charts/gitea-org-runner/values.yaml index 32eb24ec72b..c5544ecaf80 100644 --- a/charts/gitea-org-runner/values.yaml +++ b/charts/gitea-org-runner/values.yaml @@ -65,9 +65,17 @@ gitea: # Full API URL including /api/v1. Consumed by the KEDA github-runner scaler # which polls the Actions queue. Gitea's Actions API is GitHub-compatible. apiUrl: "http://altinn-repositories-public.default.svc.cluster.local/api/v1" - # Labels advertised by the runner; must match the labels referenced in - # workflow `runs-on:` entries (and the scaler's `labels` filter). + # Labels advertised by the runner via GITEA_RUNNER_LABELS env var. The + # `:host` suffix is act_runner's runner-type hint (run jobs natively in + # the pod instead of in docker); only the part BEFORE the colon is the + # label name workflows match against with `runs-on:`. runnerLabels: "ubuntu-latest:host" + # The label name (without runner-type suffix) the KEDA scaler filters + # workflow runs by. Must match the value workflows put in `runs-on:`. + # Derived from runnerLabels by stripping the ":" suffix; expose + # separately so a workflow expecting "ubuntu-latest" isn't filtered out + # by a scaler looking for the literal "ubuntu-latest:host". + scalerLabels: "ubuntu-latest" # KEDA / ScaledJob configuration. Per-org caps live below in maxConcurrent*. keda: From 66a5e76c0aebc581b40136cbdc89d086e6702bad Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 19:27:52 +0200 Subject: [PATCH 18/33] avoid idle runners after 30 minutes --- charts/gitea-org-runner/templates/scaledjob.yaml | 6 ++++++ charts/gitea-org-runner/values.yaml | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/charts/gitea-org-runner/templates/scaledjob.yaml b/charts/gitea-org-runner/templates/scaledjob.yaml index 7030693fa31..efb26b671ae 100644 --- a/charts/gitea-org-runner/templates/scaledjob.yaml +++ b/charts/gitea-org-runner/templates/scaledjob.yaml @@ -18,6 +18,12 @@ spec: jobTargetRef: backoffLimit: 0 ttlSecondsAfterFinished: {{ $.Values.keda.ttlSecondsAfterFinished | default 300 }} + # Cap on total Job runtime. Without this, a runner pod that registers + # but never gets dispatched a workflow polls Gitea forever (act_runner + # has no built-in idle timeout). Set generously enough that legitimate + # long workflows can finish, but short enough that orphaned idle + # runners get reaped. + activeDeadlineSeconds: {{ $.Values.keda.activeDeadlineSeconds | default 1800 }} template: metadata: labels: diff --git a/charts/gitea-org-runner/values.yaml b/charts/gitea-org-runner/values.yaml index c5544ecaf80..4abcbb74a53 100644 --- a/charts/gitea-org-runner/values.yaml +++ b/charts/gitea-org-runner/values.yaml @@ -89,6 +89,11 @@ keda: pollingInterval: 30 # KEDA cleans up completed Jobs older than this many seconds. ttlSecondsAfterFinished: 300 + # Hard cap on total Job runtime. Pod is killed if it exceeds this even + # while polling for work. Set to a value larger than the longest legitimate + # workflow you expect. Default 1800 = 30 min; raise if workflows can run + # longer, lower if you want orphaned idle runners reaped faster. + activeDeadlineSeconds: 1800 # K8s keeps the last N successful / failed Job records visible. successfulJobsHistoryLimit: 5 failedJobsHistoryLimit: 5 From 34060291041b0e1d9383d628877e39aef0fa9f13 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 19:35:28 +0200 Subject: [PATCH 19/33] added egress from runner-sync to otel export --- .../infra/kustomize/kustomization.yaml | 1 + .../infra/kustomize/networkpolicy.yaml | 29 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 src/runner-org-sync/infra/kustomize/networkpolicy.yaml diff --git a/src/runner-org-sync/infra/kustomize/kustomization.yaml b/src/runner-org-sync/infra/kustomize/kustomization.yaml index 334c888836e..34b922e7c05 100644 --- a/src/runner-org-sync/infra/kustomize/kustomization.yaml +++ b/src/runner-org-sync/infra/kustomize/kustomization.yaml @@ -9,6 +9,7 @@ resources: - rolebinding.yaml - cronjob.yaml - triggerauthentication.yaml + - networkpolicy.yaml # Copy the image annotation onto the container spec. The annotation value # is itself substituted at deploy time by Flux post-build substitution. diff --git a/src/runner-org-sync/infra/kustomize/networkpolicy.yaml b/src/runner-org-sync/infra/kustomize/networkpolicy.yaml new file mode 100644 index 00000000000..dfad42b33ed --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/networkpolicy.yaml @@ -0,0 +1,29 @@ +# Egress allowance for the OTel collector in the `observability` namespace. +# +# The studio-runners namespace has a cluster-wide default-deny egress policy +# (managed in altinn-studio-infra/provisioning/studio-runners-infra.tf) which +# whitelists DNS + Gitea + external internet, but not observability. Without +# this additional rule, runner-org-sync's OTLP exporter times out at pod +# exit and the run logs a `telemetry shutdown returned error` WARN. +# +# NetworkPolicies are additive: this policy adds to the studio-runners base +# policy rather than replacing it. Scoped via `app: runner-org-sync` so only +# this service gets the extra egress — tenant runner pods stay locked down. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: runner-org-sync-otel-egress +spec: + podSelector: + matchLabels: + app: runner-org-sync + policyTypes: + - Egress + egress: + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: observability + ports: + - port: 4317 + protocol: TCP From 163f6b6132c66af0a8a19efaf5ac01056337daa1 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 21:28:46 +0200 Subject: [PATCH 20/33] get secret from kv refactor --- .../cmd/runner-org-sync/main.go | 39 +++++++------------ 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/src/runner-org-sync/cmd/runner-org-sync/main.go b/src/runner-org-sync/cmd/runner-org-sync/main.go index 9cd46d72909..5d5d0597026 100644 --- a/src/runner-org-sync/cmd/runner-org-sync/main.go +++ b/src/runner-org-sync/cmd/runner-org-sync/main.go @@ -74,13 +74,13 @@ func run() error { runID := uuid.NewString() logger := tel.Logger.With("run_id", runID, "service", serviceName) - pat, patSource, err := loadPAT(ctx, cfg) + pat, patSource, err := loadSecretFromKV(ctx, cfg.GiteaPATOverride, cfg.KeyVaultName, cfg.KeyVaultSecretName) if err != nil { - return fmt.Errorf("load PAT: %w", err) + return fmt.Errorf("load admin PAT: %w", err) } logger.Info("pat.loaded", "scope", "admin", "source", string(patSource), "len", len(pat)) - kedaPAT, kedaPATSource, err := loadKedaPAT(ctx, cfg) + kedaPAT, kedaPATSource, err := loadSecretFromKV(ctx, cfg.KedaPATOverride, cfg.KeyVaultName, cfg.KedaPATKeyVaultSecretName) if err != nil { return fmt.Errorf("load KEDA PAT: %w", err) } @@ -179,35 +179,22 @@ func run() error { return nil } -// loadKedaPAT mirrors loadPAT for the read-only KEDA PAT. Env override wins; -// otherwise fetches from the same Key Vault used for the admin PAT, at a -// different secret name (KedaPATKeyVaultSecretName). -func loadKedaPAT(ctx context.Context, cfg config.Config) (string, keyvault.Source, error) { +// loadSecretFromKV resolves a single Key Vault secret, honouring an env-var +// override for local development. When override is non-empty it +// short-circuits without constructing the Azure SDK client; otherwise it +// fetches from KV via Workload Identity (DefaultAzureCredential). Generic +// over the value type — used today for the two Gitea PATs; any other +// KV-stored credential could reuse it. +func loadSecretFromKV(ctx context.Context, override, vaultName, vaultSecretName string) (string, keyvault.Source, error) { var getter keyvault.Getter - if cfg.KedaPATOverride == "" { - g, err := keyvault.NewAzureGetter(cfg.KeyVaultName) + if override == "" { + g, err := keyvault.NewAzureGetter(vaultName) if err != nil { return "", "", fmt.Errorf("build keyvault getter: %w", err) } getter = g } - loader := keyvault.NewLoader(cfg.KedaPATOverride, getter, cfg.KedaPATKeyVaultSecretName) - return loader.Load(ctx) -} - -// loadPAT resolves the Gitea admin PAT, honouring the env-var override for -// local development. In-cluster it goes through Azure Key Vault using -// Workload Identity via DefaultAzureCredential. -func loadPAT(ctx context.Context, cfg config.Config) (string, keyvault.Source, error) { - var getter keyvault.Getter - if cfg.GiteaPATOverride == "" { - g, err := keyvault.NewAzureGetter(cfg.KeyVaultName) - if err != nil { - return "", "", fmt.Errorf("build keyvault getter: %w", err) - } - getter = g - } - loader := keyvault.NewLoader(cfg.GiteaPATOverride, getter, cfg.KeyVaultSecretName) + loader := keyvault.NewLoader(override, getter, vaultSecretName) return loader.Load(ctx) } From 6e342d0afdb0e523af3386fc874ac5bb7f593e17 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 21:33:29 +0200 Subject: [PATCH 21/33] move keda secret apply --- .../cmd/runner-org-sync/main.go | 62 ++++++++++++------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/src/runner-org-sync/cmd/runner-org-sync/main.go b/src/runner-org-sync/cmd/runner-org-sync/main.go index 5d5d0597026..ae92e4b6daf 100644 --- a/src/runner-org-sync/cmd/runner-org-sync/main.go +++ b/src/runner-org-sync/cmd/runner-org-sync/main.go @@ -146,29 +146,11 @@ func run() error { return runErr } - // Project the KEDA read-only PAT into a K8s Secret. Independent of the - // per-org reconcile — runs even when the reconcile outcome is "partial" - // because the KEDA Secret has its own lifecycle. A failure here is - // non-fatal: log + metric, exit 0, retry next tick. - kedaChanged, kedaErr := store.ApplyOpaqueSecret(ctx, - cfg.KedaPATSecretName, cfg.KedaPATSecretKey, kedaPAT) - metrics.KedaSecretApplied.Add(ctx, 1, metric.WithAttributes( - attribute.Bool("changed", kedaChanged), - attribute.Bool("success", kedaErr == nil), - )) - if kedaErr != nil { - logger.Warn("keda.secret.apply.failed", "err", kedaErr.Error(), "secret", cfg.KedaPATSecretName) - span.AddEvent("keda.secret.apply.failed", trace.WithAttributes( - attribute.String("secret", cfg.KedaPATSecretName), - attribute.String("err", kedaErr.Error()), - )) - } else { - span.AddEvent("keda.secret.applied", trace.WithAttributes( - attribute.String("secret", cfg.KedaPATSecretName), - attribute.Bool("changed", kedaChanged), - )) - logger.Info("keda.secret.applied", "secret", cfg.KedaPATSecretName, "changed", kedaChanged) - } + // Independent of the per-org reconcile — runs even when its outcome is + // "partial" because the KEDA Secret has its own lifecycle. Failure is + // non-fatal: logged + counted, but the CronJob still exits 0 so the + // next tick retries. + applyKedaSecret(ctx, store, cfg, kedaPAT, metrics, span, logger) if report.Outcome == reconcile.OutcomePartial { // Continue-on-partial: still exit 0; metric + WARN log carries the signal. @@ -198,6 +180,40 @@ func loadSecretFromKV(ctx context.Context, override, vaultName, vaultSecretName return loader.Load(ctx) } +// applyKedaSecret writes the KEDA read-only PAT into a single-key Opaque +// Secret in the output namespace and emits its own metric / span event / +// log line. Separated from the per-org reconcile because it has an +// independent lifecycle (sourced from KV, not from Gitea) and an +// independent failure model (non-fatal — next tick retries). +func applyKedaSecret( + ctx context.Context, + store *k8sstate.Store, + cfg config.Config, + value string, + metrics *telemetry.Metrics, + span trace.Span, + logger *slog.Logger, +) { + changed, err := store.ApplyOpaqueSecret(ctx, cfg.KedaPATSecretName, cfg.KedaPATSecretKey, value) + metrics.KedaSecretApplied.Add(ctx, 1, metric.WithAttributes( + attribute.Bool("changed", changed), + attribute.Bool("success", err == nil), + )) + if err != nil { + logger.Warn("keda.secret.apply.failed", "err", err.Error(), "secret", cfg.KedaPATSecretName) + span.AddEvent("keda.secret.apply.failed", trace.WithAttributes( + attribute.String("secret", cfg.KedaPATSecretName), + attribute.String("err", err.Error()), + )) + return + } + span.AddEvent("keda.secret.applied", trace.WithAttributes( + attribute.String("secret", cfg.KedaPATSecretName), + attribute.Bool("changed", changed), + )) + logger.Info("keda.secret.applied", "secret", cfg.KedaPATSecretName, "changed", changed) +} + // buildK8sClient returns a clientset that prefers in-cluster config and // falls back to a local kubeconfig (KUBECONFIG / $HOME/.kube/config) so a // developer can run the binary directly against a kind cluster. From 9c7cb70587ce94840393ae441bf495a6f1c53edd Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 21:43:19 +0200 Subject: [PATCH 22/33] refactor telemetry --- .../cmd/runner-org-sync/main.go | 12 +- .../internal/telemetry/metrics.go | 9 +- .../internal/telemetry/telemetry.go | 115 ++++++++++-------- 3 files changed, 72 insertions(+), 64 deletions(-) diff --git a/src/runner-org-sync/cmd/runner-org-sync/main.go b/src/runner-org-sync/cmd/runner-org-sync/main.go index ae92e4b6daf..75795223672 100644 --- a/src/runner-org-sync/cmd/runner-org-sync/main.go +++ b/src/runner-org-sync/cmd/runner-org-sync/main.go @@ -54,7 +54,7 @@ func run() error { return fmt.Errorf("config: %w", err) } - tel, shutdown, err := telemetry.Init(ctx, serviceName) + shutdown, err := telemetry.ConfigureOTel(ctx, serviceName) if err != nil { return fmt.Errorf("telemetry init: %w", err) } @@ -66,13 +66,13 @@ func run() error { } }() - metrics, err := telemetry.NewMetrics(tel.Meter) + metrics, err := telemetry.NewMetrics() if err != nil { return fmt.Errorf("telemetry metrics: %w", err) } runID := uuid.NewString() - logger := tel.Logger.With("run_id", runID, "service", serviceName) + logger := slog.With("run_id", runID, "service", serviceName) pat, patSource, err := loadSecretFromKV(ctx, cfg.GiteaPATOverride, cfg.KeyVaultName, cfg.KeyVaultSecretName) if err != nil { @@ -107,7 +107,7 @@ func run() error { return fmt.Errorf("build reconciler: %w", err) } - ctx, span := tel.Tracer.Start(ctx, "runner_org_sync.reconcile", + ctx, span := telemetry.Tracer().Start(ctx, "runner_org_sync.reconcile", trace.WithAttributes(attribute.String("run_id", runID)), ) defer span.End() @@ -150,7 +150,7 @@ func run() error { // "partial" because the KEDA Secret has its own lifecycle. Failure is // non-fatal: logged + counted, but the CronJob still exits 0 so the // next tick retries. - applyKedaSecret(ctx, store, cfg, kedaPAT, metrics, span, logger) + applyKedaSecret(ctx, store, cfg, kedaPAT, metrics, logger) if report.Outcome == reconcile.OutcomePartial { // Continue-on-partial: still exit 0; metric + WARN log carries the signal. @@ -191,9 +191,9 @@ func applyKedaSecret( cfg config.Config, value string, metrics *telemetry.Metrics, - span trace.Span, logger *slog.Logger, ) { + span := trace.SpanFromContext(ctx) changed, err := store.ApplyOpaqueSecret(ctx, cfg.KedaPATSecretName, cfg.KedaPATSecretKey, value) metrics.KedaSecretApplied.Add(ctx, 1, metric.WithAttributes( attribute.Bool("changed", changed), diff --git a/src/runner-org-sync/internal/telemetry/metrics.go b/src/runner-org-sync/internal/telemetry/metrics.go index a6e08037e7f..2d30e17b4f3 100644 --- a/src/runner-org-sync/internal/telemetry/metrics.go +++ b/src/runner-org-sync/internal/telemetry/metrics.go @@ -28,10 +28,11 @@ type Metrics struct { KedaSecretApplied metric.Int64Counter } -// NewMetrics constructs every instrument from the supplied Meter. Returns -// an error if any instrument cannot be created; in practice this only fires -// on misconfigured SDKs. -func NewMetrics(m metric.Meter) (*Metrics, error) { +// NewMetrics constructs every instrument from the package's Meter (set up +// by ConfigureOTel). Returns an error if any instrument cannot be created; +// in practice this only fires on misconfigured SDKs. +func NewMetrics() (*Metrics, error) { + m := Meter() mk := func(target *metric.Float64Histogram, name, desc, unit string) error { h, err := m.Float64Histogram(name, metric.WithDescription(desc), metric.WithUnit(unit)) if err != nil { diff --git a/src/runner-org-sync/internal/telemetry/telemetry.go b/src/runner-org-sync/internal/telemetry/telemetry.go index a58717fbbac..cf1b4ef196f 100644 --- a/src/runner-org-sync/internal/telemetry/telemetry.go +++ b/src/runner-org-sync/internal/telemetry/telemetry.go @@ -1,11 +1,11 @@ -// Package telemetry configures OpenTelemetry traces and metrics, plus a -// structured slog logger writing JSON to stdout. +// Package telemetry configures OpenTelemetry traces and metrics, sets the +// default slog logger, and exposes package-level Tracer/Meter accessors. // -// Init returns a Telemetry value carrying ready-to-use Logger, Tracer, and -// Meter, and a Shutdown closer that flushes both the trace and metric -// pipelines. When OTEL_EXPORTER_OTLP_ENDPOINT is unset the OTLP exporters -// are skipped entirely — the no-op providers from the OTel SDK keep working -// so callers do not need conditional code paths. +// Mirrors the pattern used by src/Runtime/pdf3/internal/telemetry: callers +// invoke ConfigureOTel once at startup, defer the returned shutdown, and use +// telemetry.Tracer() / telemetry.Meter() anywhere they need an instrument. +// No per-handle struct to thread through call sites — OTel's global +// providers do that work. package telemetry import ( @@ -20,66 +20,85 @@ import ( "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" "go.opentelemetry.io/otel/metric" - "go.opentelemetry.io/otel/sdk/resource" + "go.opentelemetry.io/otel/propagation" sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.26.0" "go.opentelemetry.io/otel/trace" ) -// Telemetry exposes the three observability handles the rest of the service -// uses. None of them require a non-nil OTLP endpoint to be safe to call. -type Telemetry struct { - Logger *slog.Logger - Tracer trace.Tracer - Meter metric.Meter -} +const scope = "altinn.studio/runner-org-sync" -// Shutdown flushes and stops the OTel pipelines. Always call on exit, with -// a short bounded context (10s is plenty). -type Shutdown func(ctx context.Context) error +// Tracer returns the package's tracer. Safe to call before ConfigureOTel — +// the OTel SDK's default global provider is a no-op until a real one is +// installed, so the returned tracer always works. +// +//nolint:ireturn // OpenTelemetry intentionally exposes interface-returning accessors. +func Tracer() trace.Tracer { + return otel.Tracer(scope) +} -// scope is the instrumentation scope name used for the tracer and meter. -const scope = "altinn.studio/runner-org-sync" +// Meter returns the package's meter. Same semantics as Tracer. +// +//nolint:ireturn // OpenTelemetry intentionally exposes interface-returning accessors. +func Meter() metric.Meter { + return otel.Meter(scope) +} -// Init configures providers and returns ready-to-use handles. serviceName -// defaults to "runner-org-sync" when empty and overrides any value the SDK -// would otherwise pick up from OTEL_SERVICE_NAME. -func Init(ctx context.Context, serviceName string) (*Telemetry, Shutdown, error) { +// ConfigureOTel bootstraps OpenTelemetry (traces + metrics) and sets the +// default slog logger. Always defer the returned shutdown on exit with a +// bounded context — 10s is plenty for our payload sizes. +// +// If OTEL_EXPORTER_OTLP_ENDPOINT is unset (typical for local dev) the OTLP +// exporters are skipped entirely and the global no-op providers continue to +// satisfy Tracer() / Meter() calls. +func ConfigureOTel(ctx context.Context, serviceName string) (func(context.Context) error, error) { if serviceName == "" { serviceName = "runner-org-sync" } - logger := slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + // Default slog handler: JSON to stdout. Keeps `kubectl logs` readable + // for humans and parseable for log aggregators. + slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ Level: slog.LevelInfo, - })) - slog.SetDefault(logger) + }))) res, err := resource.New(ctx, resource.WithAttributes(semconv.ServiceName(serviceName)), - resource.WithFromEnv(), // OTEL_RESOURCE_ATTRIBUTES + resource.WithFromEnv(), // OTEL_RESOURCE_ATTRIBUTES resource.WithProcessPID(), resource.WithHost(), ) if err != nil { - return nil, nil, fmt.Errorf("telemetry: resource: %w", err) + return nil, fmt.Errorf("telemetry: resource: %w", err) } - // If no OTLP endpoint is configured (typical for local dev) skip exporters - // entirely. The default global TracerProvider / MeterProvider are no-ops, - // so call sites do not need conditional logic. + // Set propagator so any future cross-service call (HTTP/gRPC) preserves + // trace context automatically. Free for us — costs nothing if unused. + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) + + noop := func(context.Context) error { return nil } if !otlpEndpointConfigured() { - t := &Telemetry{ - Logger: logger, - Tracer: otel.Tracer(scope), - Meter: otel.Meter(scope), + return noop, nil + } + + var shutdownFuncs []func(context.Context) error + shutdown := func(ctx context.Context) error { + var shutdownErr error + for _, fn := range shutdownFuncs { + shutdownErr = errors.Join(shutdownErr, fn(ctx)) } - return t, func(context.Context) error { return nil }, nil + shutdownFuncs = nil + return shutdownErr } traceExp, err := otlptracegrpc.New(ctx) if err != nil { - return nil, nil, fmt.Errorf("telemetry: trace exporter: %w", err) + return shutdown, fmt.Errorf("telemetry: trace exporter: %w", err) } tp := sdktrace.NewTracerProvider( sdktrace.WithBatcher(traceExp), @@ -87,13 +106,11 @@ func Init(ctx context.Context, serviceName string) (*Telemetry, Shutdown, error) sdktrace.WithSampler(sdktrace.AlwaysSample()), ) otel.SetTracerProvider(tp) + shutdownFuncs = append(shutdownFuncs, tp.Shutdown) metricExp, err := otlpmetricgrpc.New(ctx) if err != nil { - // Best-effort cleanup of the already-installed trace exporter so we - // do not leave background goroutines if Init returns an error. - _ = tp.Shutdown(ctx) - return nil, nil, fmt.Errorf("telemetry: metric exporter: %w", err) + return shutdown, fmt.Errorf("telemetry: metric exporter: %w", err) } mp := sdkmetric.NewMeterProvider( sdkmetric.WithResource(res), @@ -102,19 +119,9 @@ func Init(ctx context.Context, serviceName string) (*Telemetry, Shutdown, error) )), ) otel.SetMeterProvider(mp) + shutdownFuncs = append(shutdownFuncs, mp.Shutdown) - shutdown := func(ctx context.Context) error { - return errors.Join( - tp.Shutdown(ctx), - mp.Shutdown(ctx), - ) - } - - return &Telemetry{ - Logger: logger, - Tracer: otel.Tracer(scope), - Meter: otel.Meter(scope), - }, shutdown, nil + return shutdown, nil } func otlpEndpointConfigured() bool { From 6f0285b34dcc16743bad085a2250536864fd7090 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 21:53:28 +0200 Subject: [PATCH 23/33] rop dead helpers, fail-fast on unauthorized mint --- src/runner-org-sync/internal/cdn/cdn.go | 15 ------- src/runner-org-sync/internal/cdn/cdn_test.go | 24 ----------- src/runner-org-sync/internal/config/config.go | 18 --------- .../internal/config/config_test.go | 16 ++++---- .../internal/reconcile/reconcile.go | 11 ++++- .../internal/reconcile/reconcile_test.go | 40 +++++++++++++++++++ 6 files changed, 58 insertions(+), 66 deletions(-) diff --git a/src/runner-org-sync/internal/cdn/cdn.go b/src/runner-org-sync/internal/cdn/cdn.go index 58514e36f92..7cc2b22c3f2 100644 --- a/src/runner-org-sync/internal/cdn/cdn.go +++ b/src/runner-org-sync/internal/cdn/cdn.go @@ -34,21 +34,6 @@ type Org struct { Environments []string `json:"environments"` } -// DisplayName returns the most useful human-readable name available: -// English preferred, any language otherwise, falling back to the code. -// Used for span attributes and log fields, not for reconciliation logic. -func (o Org) DisplayName() string { - if v, ok := o.Name["en"]; ok && v != "" { - return v - } - for _, v := range o.Name { - if v != "" { - return v - } - } - return o.Code -} - // Client fetches the orgs document. type Client struct { httpClient *http.Client diff --git a/src/runner-org-sync/internal/cdn/cdn_test.go b/src/runner-org-sync/internal/cdn/cdn_test.go index ca53e353ed6..16ff0909f9d 100644 --- a/src/runner-org-sync/internal/cdn/cdn_test.go +++ b/src/runner-org-sync/internal/cdn/cdn_test.go @@ -73,10 +73,6 @@ func TestFetch_Happy(t *testing.T) { if got, want := ttd.Environments, []string{"tt02", "production"}; !equalSlice(got, want) { t.Errorf("ttd.Environments = %v, want %v", got, want) } - if got, want := ttd.DisplayName(), "Test org TTD"; got != want { - t.Errorf("ttd.DisplayName = %q, want %q", got, want) - } - acn := byCode["acn"] if len(acn.Environments) != 0 { t.Errorf("acn.Environments = %v, want empty", acn.Environments) @@ -150,26 +146,6 @@ func TestFetch_ContextCancelled(t *testing.T) { } } -func TestDisplayName(t *testing.T) { - cases := []struct { - name string - org Org - want string - }{ - {"prefers en", Org{Code: "x", Name: map[string]string{"en": "English", "nb": "Norsk"}}, "English"}, - {"falls back to any language", Org{Code: "x", Name: map[string]string{"nb": "Norsk"}}, "Norsk"}, - {"empty en falls through", Org{Code: "x", Name: map[string]string{"en": "", "nn": "Nynorsk"}}, "Nynorsk"}, - {"no name uses code", Org{Code: "x"}, "x"}, - } - for _, c := range cases { - t.Run(c.name, func(t *testing.T) { - if got := c.org.DisplayName(); got != c.want { - t.Errorf("DisplayName = %q, want %q", got, c.want) - } - }) - } -} - func indexByCode(orgs []Org) map[string]Org { m := make(map[string]Org, len(orgs)) for _, o := range orgs { diff --git a/src/runner-org-sync/internal/config/config.go b/src/runner-org-sync/internal/config/config.go index 32ad474f3bb..2f4b6d5588a 100644 --- a/src/runner-org-sync/internal/config/config.go +++ b/src/runner-org-sync/internal/config/config.go @@ -131,24 +131,6 @@ func (c Config) SecretNameFor(org string) string { return strings.ReplaceAll(c.SecretNamePattern, OrgPlaceholder, org) } -// PATSource returns a short human-readable label describing where the admin -// PAT will be sourced from. Useful for the startup log line. -func (c Config) PATSource() string { - if c.GiteaPATOverride != "" { - return "env" - } - return "keyvault" -} - -// KedaPATSource returns where the KEDA PAT will be sourced from. Mirrors -// PATSource so the startup log makes both sources visible. -func (c Config) KedaPATSource() string { - if c.KedaPATOverride != "" { - return "env" - } - return "keyvault" -} - func requireField(errs *[]error, name, value string) { if value == "" { *errs = append(*errs, fmt.Errorf("%s is required", name)) diff --git a/src/runner-org-sync/internal/config/config_test.go b/src/runner-org-sync/internal/config/config_test.go index 59776b18f9b..33fbb107bce 100644 --- a/src/runner-org-sync/internal/config/config_test.go +++ b/src/runner-org-sync/internal/config/config_test.go @@ -41,8 +41,8 @@ func TestLoadFrom_Valid(t *testing.T) { if cfg.SyncAll { t.Errorf("SyncAll = true, want false") } - if cfg.PATSource() != "keyvault" { - t.Errorf("PATSource = %q, want keyvault", cfg.PATSource()) + if cfg.GiteaPATOverride != "" { + t.Errorf("GiteaPATOverride = %q, want empty (no override in baseline env)", cfg.GiteaPATOverride) } } @@ -56,8 +56,8 @@ func TestLoadFrom_PATOverrideRelaxesKeyVaultRequirement(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if cfg.PATSource() != "env" { - t.Errorf("PATSource = %q, want env", cfg.PATSource()) + if cfg.GiteaPATOverride != "pat-xyz" { + t.Errorf("GiteaPATOverride = %q, want pat-xyz", cfg.GiteaPATOverride) } } @@ -70,11 +70,11 @@ func TestLoadFrom_KedaPATOverrideRelaxesKVRequirement(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if cfg.KedaPATSource() != "env" { - t.Errorf("KedaPATSource = %q, want env", cfg.KedaPATSource()) + if cfg.KedaPATOverride != "keda-pat-xyz" { + t.Errorf("KedaPATOverride = %q, want keda-pat-xyz", cfg.KedaPATOverride) } - if cfg.PATSource() != "keyvault" { - t.Errorf("PATSource = %q, want keyvault (admin still goes to KV)", cfg.PATSource()) + if cfg.GiteaPATOverride != "" { + t.Errorf("GiteaPATOverride = %q, want empty (admin still goes to KV)", cfg.GiteaPATOverride) } } diff --git a/src/runner-org-sync/internal/reconcile/reconcile.go b/src/runner-org-sync/internal/reconcile/reconcile.go index 8f84d596d48..a67e0405d31 100644 --- a/src/runner-org-sync/internal/reconcile/reconcile.go +++ b/src/runner-org-sync/internal/reconcile/reconcile.go @@ -16,6 +16,8 @@ import ( "strings" "altinn.studio/runner-org-sync/internal/cdn" + "altinn.studio/runner-org-sync/internal/gitea" + "altinn.studio/runner-org-sync/internal/k8sstate" corev1 "k8s.io/api/core/v1" ) @@ -182,6 +184,13 @@ func (r *Reconciler) Run(ctx context.Context) (Report, error) { } token, err := r.minter.MintRegistrationToken(ctx, org.Code) if err != nil { + // Auth failures hit every subsequent org with the same PAT — + // fail fast instead of cascading the same root cause across + // the whole desired set. K8s records the CronJob failure and + // the next tick retries with whatever the latest PAT in KV is. + if errors.Is(err, gitea.ErrUnauthorized) { + return report, fmt.Errorf("reconcile: mint token for %s: %w", org.Code, err) + } report.FailedOrgs = append(report.FailedOrgs, OrgFailure{Org: org.Code, Stage: StageMint, Err: err}) continue } @@ -199,7 +208,7 @@ func (r *Reconciler) Run(ctx context.Context) (Report, error) { desiredSet[o.Code] = struct{}{} } for _, sec := range existing { - org := sec.Labels["runner-org-sync.altinn.studio/org"] + org := k8sstate.OrgFromSecret(sec) if org == "" { // Defence in depth: a managed Secret missing the org label is a // drift signal; skip rather than delete on uncertain attribution. diff --git a/src/runner-org-sync/internal/reconcile/reconcile_test.go b/src/runner-org-sync/internal/reconcile/reconcile_test.go index a5ef8e98176..158d85fa29a 100644 --- a/src/runner-org-sync/internal/reconcile/reconcile_test.go +++ b/src/runner-org-sync/internal/reconcile/reconcile_test.go @@ -3,11 +3,13 @@ package reconcile import ( "context" "errors" + "fmt" "sort" "strings" "testing" "altinn.studio/runner-org-sync/internal/cdn" + "altinn.studio/runner-org-sync/internal/gitea" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -347,6 +349,44 @@ func TestRun_GiteaPartialFailure(t *testing.T) { // --- additional coverage ---------------------------------------------------- +// Auth failures hit every org with the same PAT — failing fast avoids a +// cascade of identical errors and keeps failure attribution clean. +func TestRun_FatalOnUnauthorizedMint(t *testing.T) { + // Reconciler sorts orgs alphabetically; "aaa" gets minted first. + // If the first org returns ErrUnauthorized we should bail fatally + // before attempting "zzz". + src := &stubSource{orgs: []cdn.Org{ + {Code: "aaa", Environments: []string{"tt02"}}, + {Code: "zzz", Environments: []string{"tt02"}}, + }} + minter := &stubMinter{ + errs: map[string]error{ + "aaa": fmt.Errorf("minting aaa: %w", gitea.ErrUnauthorized), + }, + } + store := newStubStore() + + r, _ := New(Options{ + Source: src, + Minter: minter, + Store: store, + SecretNameFor: secretNameFor, + ConfigMapName: "runner-org-list", + Whitelist: []string{"aaa", "zzz"}, + }) + _, err := r.Run(context.Background()) + if err == nil { + t.Fatal("expected fatal error on ErrUnauthorized, got nil") + } + if !errors.Is(err, gitea.ErrUnauthorized) { + t.Errorf("expected wrapped ErrUnauthorized, got %v", err) + } + // zzz should NOT have been attempted — fail-fast short-circuits. + if containsString(minter.calls, "zzz") { + t.Errorf("zzz should not be minted after aaa's 401; got calls %v", minter.calls) + } +} + func TestRun_FatalOnSourceError(t *testing.T) { r, _ := New(Options{ Source: &stubSource{err: errors.New("cdn down")}, From ff9e6bda482843d8e4547da0c0654a7561cb536d Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 22:07:12 +0200 Subject: [PATCH 24/33] remove unused duration metrics --- src/runner-org-sync/internal/telemetry/metrics.go | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/runner-org-sync/internal/telemetry/metrics.go b/src/runner-org-sync/internal/telemetry/metrics.go index 2d30e17b4f3..318299cc16f 100644 --- a/src/runner-org-sync/internal/telemetry/metrics.go +++ b/src/runner-org-sync/internal/telemetry/metrics.go @@ -21,9 +21,6 @@ type Metrics struct { SecretsDeleted metric.Int64Counter SecretsSkipped metric.Int64Counter OrgReconcileErrors metric.Int64Counter - GiteaCallDuration metric.Float64Histogram - KeyVaultDuration metric.Float64Histogram - CDNCallDuration metric.Float64Histogram ConfigMapApplied metric.Int64Counter KedaSecretApplied metric.Int64Counter } @@ -86,15 +83,6 @@ func NewMetrics() (*Metrics, error) { if err := mc(&out.OrgReconcileErrors, "runner_org_sync.org.reconcile_errors", "Per-org reconcile failures by stage."); err != nil { return nil, err } - if err := mk(&out.GiteaCallDuration, "runner_org_sync.gitea.call.duration", "Gitea admin API call duration.", "s"); err != nil { - return nil, err - } - if err := mk(&out.KeyVaultDuration, "runner_org_sync.keyvault.call.duration", "Azure Key Vault secret fetch duration.", "s"); err != nil { - return nil, err - } - if err := mk(&out.CDNCallDuration, "runner_org_sync.cdn.call.duration", "CDN fetch duration for altinn-orgs.json.", "s"); err != nil { - return nil, err - } if err := mc(&out.ConfigMapApplied, "runner_org_sync.configmap.applied", "ConfigMap apply attempts by changed=true|false."); err != nil { return nil, err } From eb7a03a88209318b8742e3a148a08d937f8cc6c6 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Wed, 13 May 2026 22:17:48 +0200 Subject: [PATCH 25/33] trimspace fields and limit reader --- src/runner-org-sync/internal/cdn/cdn.go | 6 +++++- src/runner-org-sync/internal/config/config.go | 4 ++-- src/runner-org-sync/internal/gitea/gitea.go | 6 +++++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/runner-org-sync/internal/cdn/cdn.go b/src/runner-org-sync/internal/cdn/cdn.go index 7cc2b22c3f2..13c97f5ca43 100644 --- a/src/runner-org-sync/internal/cdn/cdn.go +++ b/src/runner-org-sync/internal/cdn/cdn.go @@ -20,6 +20,10 @@ const ( defaultTimeout = 30 * time.Second defaultUserAgent = "runner-org-sync" maxErrorBody = 512 + // maxSuccessBody caps the JSON-decode read so a pathological CDN + // response cannot exhaust pod memory. altinn-orgs.json is ~100 KB + // today; 10 MiB is generous and far below the pod's memory limit. + maxSuccessBody = 10 << 20 // 10 MiB ) // ErrUnexpectedStatus is returned when the CDN responds with non-2xx. @@ -91,7 +95,7 @@ func (c *Client) Fetch(ctx context.Context) ([]Org, error) { var doc struct { Orgs map[string]Org `json:"orgs"` } - if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil { + if err := json.NewDecoder(io.LimitReader(resp.Body, maxSuccessBody)).Decode(&doc); err != nil { return nil, fmt.Errorf("cdn: decode body: %w", err) } diff --git a/src/runner-org-sync/internal/config/config.go b/src/runner-org-sync/internal/config/config.go index 2f4b6d5588a..c946a62a887 100644 --- a/src/runner-org-sync/internal/config/config.go +++ b/src/runner-org-sync/internal/config/config.go @@ -80,12 +80,12 @@ func LoadFrom(get Getter) (Config, error) { KeyVaultSecretName: strings.TrimSpace(get(EnvKeyVaultSecretName)), SyncAll: parseBool(get(EnvSyncAll)), WhitelistedOrgs: parseCSV(get(EnvWhitelistedOrgs)), - GiteaPATOverride: get(EnvGiteaPATOverride), + GiteaPATOverride: strings.TrimSpace(get(EnvGiteaPATOverride)), KedaPATKeyVaultSecretName: strings.TrimSpace(get(EnvKedaPATKeyVaultSecretName)), KedaPATSecretName: strings.TrimSpace(get(EnvKedaPATSecretName)), KedaPATSecretKey: strings.TrimSpace(get(EnvKedaPATSecretKey)), - KedaPATOverride: get(EnvKedaPATOverride), + KedaPATOverride: strings.TrimSpace(get(EnvKedaPATOverride)), } var errs []error diff --git a/src/runner-org-sync/internal/gitea/gitea.go b/src/runner-org-sync/internal/gitea/gitea.go index c51f211a3e8..b143e8a0ae3 100644 --- a/src/runner-org-sync/internal/gitea/gitea.go +++ b/src/runner-org-sync/internal/gitea/gitea.go @@ -23,6 +23,10 @@ const ( defaultTimeout = 15 * time.Second defaultUserAgent = "runner-org-sync" maxErrorBody = 512 + // maxSuccessBody caps the registration-token JSON decode. The real + // response is a few hundred bytes; 16 KiB is generous defense against + // a pathological Gitea reply. + maxSuccessBody = 16 << 10 // 16 KiB ) // Sentinel errors. Callers can errors.Is against these to drive reconcile @@ -118,7 +122,7 @@ func (c *Client) MintRegistrationToken(ctx context.Context, org string) (string, var payload struct { Token string `json:"token"` } - if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { + if err := json.NewDecoder(io.LimitReader(resp.Body, maxSuccessBody)).Decode(&payload); err != nil { return "", fmt.Errorf("gitea: decode response for %s: %w", org, err) } if payload.Token == "" { From 53d1879803e47c9612edec3898d8c2230f00f79c Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Mon, 18 May 2026 09:50:00 +0200 Subject: [PATCH 26/33] apply KEDA PAT secret on reconcile failure --- src/runner-org-sync/cmd/runner-org-sync/main.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/runner-org-sync/cmd/runner-org-sync/main.go b/src/runner-org-sync/cmd/runner-org-sync/main.go index 75795223672..52af9e539c2 100644 --- a/src/runner-org-sync/cmd/runner-org-sync/main.go +++ b/src/runner-org-sync/cmd/runner-org-sync/main.go @@ -140,18 +140,19 @@ func run() error { "configmap_changed", report.ConfigMapChanged, ) + // Independent of the per-org reconcile — runs even when its outcome is + // "partial" or fatal because the KEDA Secret has its own lifecycle. + // Failure is non-fatal: logged + counted, but the CronJob exit code is + // still driven by the org reconcile result so fatal reconcile errors stay + // visible to Kubernetes. + applyKedaSecret(ctx, store, cfg, kedaPAT, metrics, logger) + if runErr != nil { span.RecordError(runErr) span.SetStatus(codes.Error, runErr.Error()) return runErr } - // Independent of the per-org reconcile — runs even when its outcome is - // "partial" because the KEDA Secret has its own lifecycle. Failure is - // non-fatal: logged + counted, but the CronJob still exits 0 so the - // next tick retries. - applyKedaSecret(ctx, store, cfg, kedaPAT, metrics, logger) - if report.Outcome == reconcile.OutcomePartial { // Continue-on-partial: still exit 0; metric + WARN log carries the signal. span.SetStatus(codes.Ok, "partial") From 781d11c3d2dd3d0bdd3c0bdd770ec18075df578c Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Mon, 18 May 2026 09:53:27 +0200 Subject: [PATCH 27/33] restore labels on unchanged k8s objects --- .../internal/k8sstate/k8sstate.go | 35 ++++++---- .../internal/k8sstate/k8sstate_test.go | 66 ++++++++++++++++++- 2 files changed, 87 insertions(+), 14 deletions(-) diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate.go b/src/runner-org-sync/internal/k8sstate/k8sstate.go index afa3028053e..5d111510389 100644 --- a/src/runner-org-sync/internal/k8sstate/k8sstate.go +++ b/src/runner-org-sync/internal/k8sstate/k8sstate.go @@ -116,7 +116,7 @@ func (s *Store) DeleteSecret(ctx context.Context, name string) error { // ApplyConfigMap creates or updates the named ConfigMap so its Data matches // the supplied value. Returns true if a write actually occurred (create or // update), false if the existing object already matched. Labels are -// preserved on update; the managed-by label is added if missing. +// preserved on update; managed labels are added or restored if missing. func (s *Store) ApplyConfigMap(ctx context.Context, name string, data map[string]string) (bool, error) { desired := &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ @@ -141,15 +141,15 @@ func (s *Store) ApplyConfigMap(ctx context.Context, name string, data map[string return false, fmt.Errorf("k8sstate: get configmap %s: %w", name, err) } - if maps.Equal(existing.Data, data) { - return false, nil - } - existing.Data = data if existing.Labels == nil { existing.Labels = map[string]string{} } - existing.Labels[LabelManagedBy] = ManagedBy - existing.Labels[LabelComponent] = ComponentRunnerCM + labelsChanged := ensureLabel(existing.Labels, LabelManagedBy, ManagedBy) + labelsChanged = ensureLabel(existing.Labels, LabelComponent, ComponentRunnerCM) || labelsChanged + if maps.Equal(existing.Data, data) && !labelsChanged { + return false, nil + } + existing.Data = data if _, err := s.client.CoreV1().ConfigMaps(s.namespace).Update(ctx, existing, metav1.UpdateOptions{}); err != nil { return false, fmt.Errorf("k8sstate: update configmap %s: %w", name, err) @@ -163,7 +163,7 @@ func (s *Store) ApplyConfigMap(ctx context.Context, name string, data map[string // consumed by KEDA's TriggerAuthentication. // // Labels are applied on create (ManagedBy). On update, the managed-by label -// is added if missing; other existing labels are preserved. +// is added or restored if missing; other existing labels are preserved. func (s *Store) ApplyOpaqueSecret(ctx context.Context, name, key, value string) (bool, error) { if key == "" { return false, fmt.Errorf("k8sstate: ApplyOpaqueSecret %s: key is required", name) @@ -192,8 +192,13 @@ func (s *Store) ApplyOpaqueSecret(ctx context.Context, name, key, value string) return false, fmt.Errorf("k8sstate: get opaque secret %s: %w", name, err) } + if existing.Labels == nil { + existing.Labels = map[string]string{} + } + labelsChanged := ensureLabel(existing.Labels, LabelManagedBy, ManagedBy) + // Only writing the single key we manage; leave any other keys untouched. - if bytes.Equal(existing.Data[key], encoded) { + if bytes.Equal(existing.Data[key], encoded) && !labelsChanged { return false, nil } @@ -201,10 +206,6 @@ func (s *Store) ApplyOpaqueSecret(ctx context.Context, name, key, value string) existing.Data = map[string][]byte{} } existing.Data[key] = encoded - if existing.Labels == nil { - existing.Labels = map[string]string{} - } - existing.Labels[LabelManagedBy] = ManagedBy if _, err := s.client.CoreV1().Secrets(s.namespace).Update(ctx, existing, metav1.UpdateOptions{}); err != nil { return false, fmt.Errorf("k8sstate: update opaque secret %s: %w", name, err) @@ -218,3 +219,11 @@ func (s *Store) ApplyOpaqueSecret(ctx context.Context, name, key, value string) func OrgFromSecret(s corev1.Secret) string { return s.Labels[LabelOrg] } + +func ensureLabel(labels map[string]string, key, value string) bool { + if labels[key] == value { + return false + } + labels[key] = value + return true +} diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go index 53bd32bda40..dcfebb972a2 100644 --- a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go +++ b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go @@ -161,7 +161,10 @@ func TestApplyConfigMap_NoOpOnSameContent(t *testing.T) { c := fake.NewSimpleClientset(&corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: "cm", Namespace: testNamespace, - Labels: map[string]string{LabelManagedBy: ManagedBy}, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRunnerCM, + }, }, Data: map[string]string{"k": "v"}, }) @@ -176,6 +179,36 @@ func TestApplyConfigMap_NoOpOnSameContent(t *testing.T) { } } +func TestApplyConfigMap_UpdatesOnLabelDrift(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cm", + Namespace: testNamespace, + Labels: map[string]string{"custom": "keep"}, + }, + Data: map[string]string{"k": "v"}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyConfigMap(context.Background(), "cm", map[string]string{"k": "v"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (label drift)") + } + got, _ := c.CoreV1().ConfigMaps(testNamespace).Get(context.Background(), "cm", metav1.GetOptions{}) + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by label was not restored, got %v", got.Labels) + } + if got.Labels[LabelComponent] != ComponentRunnerCM { + t.Errorf("component label was not restored, got %v", got.Labels) + } + if got.Labels["custom"] != "keep" { + t.Errorf("custom label was not preserved, got %v", got.Labels) + } +} + func TestApplyConfigMap_UpdatesOnDifference(t *testing.T) { c := fake.NewSimpleClientset(&corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{Name: "cm", Namespace: testNamespace}, @@ -255,6 +288,37 @@ func TestApplyOpaqueSecret_NoOpOnSameValue(t *testing.T) { } } +func TestApplyOpaqueSecret_UpdatesOnLabelDrift(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "keda-gitea-pat", + Namespace: testNamespace, + Labels: map[string]string{"custom": "keep"}, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{"token": []byte("pat-value")}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyOpaqueSecret(context.Background(), "keda-gitea-pat", "token", "pat-value") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (label drift)") + } + got, _ := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "keda-gitea-pat", metav1.GetOptions{}) + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by label was not restored, got %v", got.Labels) + } + if got.Labels["custom"] != "keep" { + t.Errorf("custom label was not preserved, got %v", got.Labels) + } + if string(got.Data["token"]) != "pat-value" { + t.Errorf("token = %q, want pat-value", string(got.Data["token"])) + } +} + func TestApplyOpaqueSecret_UpdatesOnDifference(t *testing.T) { c := fake.NewSimpleClientset(&corev1.Secret{ ObjectMeta: metav1.ObjectMeta{Name: "keda-gitea-pat", Namespace: testNamespace}, From 4e32b8a172c3320702a2783a167bc5d0b74687e2 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Mon, 18 May 2026 09:56:35 +0200 Subject: [PATCH 28/33] validate runner secrets before projection --- .../internal/k8sstate/k8sstate.go | 34 ++++++++++ .../internal/k8sstate/k8sstate_test.go | 66 +++++++++++++++++++ .../internal/reconcile/reconcile.go | 52 ++++++++++----- .../internal/reconcile/reconcile_test.go | 54 +++++++++++---- 4 files changed, 177 insertions(+), 29 deletions(-) diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate.go b/src/runner-org-sync/internal/k8sstate/k8sstate.go index 5d111510389..23fff3328e2 100644 --- a/src/runner-org-sync/internal/k8sstate/k8sstate.go +++ b/src/runner-org-sync/internal/k8sstate/k8sstate.go @@ -35,6 +35,16 @@ const ( SecretTokenKey = "token" ) +// RegistrationSecretState describes whether a per-org runner registration +// Secret is safe for the ConfigMap to reference. +type RegistrationSecretState string + +const ( + RegistrationSecretMissing RegistrationSecretState = "missing" + RegistrationSecretValid RegistrationSecretState = "valid" + RegistrationSecretInvalid RegistrationSecretState = "invalid" +) + // Store is the package's only entry point for cluster I/O. type Store struct { client kubernetes.Interface @@ -78,6 +88,30 @@ func (s *Store) SecretExists(ctx context.Context, name string) (bool, error) { return false, fmt.Errorf("k8sstate: get secret %s: %w", name, err) } +// RegistrationSecretStatus reports whether the named Secret exists and has +// the ownership labels and token data expected for the given org. +func (s *Store) RegistrationSecretStatus(ctx context.Context, name, org string) (RegistrationSecretState, error) { + sec, err := s.client.CoreV1().Secrets(s.namespace).Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return RegistrationSecretMissing, nil + } + if err != nil { + return "", fmt.Errorf("k8sstate: get registration secret %s: %w", name, err) + } + if sec.Type != "" && sec.Type != corev1.SecretTypeOpaque { + return RegistrationSecretInvalid, nil + } + if sec.Labels[LabelManagedBy] != ManagedBy || + sec.Labels[LabelComponent] != ComponentRegToken || + sec.Labels[LabelOrg] != org { + return RegistrationSecretInvalid, nil + } + if len(sec.Data[SecretTokenKey]) == 0 { + return RegistrationSecretInvalid, nil + } + return RegistrationSecretValid, nil +} + // CreateRegistrationSecret creates an Opaque Secret carrying the // registration token at key "token", labelled with ManagedBy / Component / // Org. Returns the underlying error verbatim so callers can use apierrors.IsAlreadyExists. diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go index dcfebb972a2..2a886ddb480 100644 --- a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go +++ b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go @@ -68,6 +68,72 @@ func TestSecretExists(t *testing.T) { } } +func TestRegistrationSecretStatus(t *testing.T) { + c := fake.NewSimpleClientset( + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "valid", + Namespace: testNamespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRegToken, + LabelOrg: "ttd", + }, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{SecretTokenKey: []byte("tok")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foreign", + Namespace: testNamespace, + Labels: map[string]string{LabelManagedBy: "someone-else"}, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{SecretTokenKey: []byte("tok")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "empty-token", + Namespace: testNamespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRegToken, + LabelOrg: "ttd", + }, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{SecretTokenKey: nil}, + }, + ) + s := NewStore(c, testNamespace) + + tests := []struct { + name string + secretName string + org string + want RegistrationSecretState + }{ + {name: "valid", secretName: "valid", org: "ttd", want: RegistrationSecretValid}, + {name: "missing", secretName: "missing", org: "ttd", want: RegistrationSecretMissing}, + {name: "foreign same name", secretName: "foreign", org: "ttd", want: RegistrationSecretInvalid}, + {name: "wrong org", secretName: "valid", org: "brg", want: RegistrationSecretInvalid}, + {name: "empty token", secretName: "empty-token", org: "ttd", want: RegistrationSecretInvalid}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := s.RegistrationSecretStatus(context.Background(), tt.secretName, tt.org) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != tt.want { + t.Errorf("RegistrationSecretStatus() = %q, want %q", got, tt.want) + } + }) + } +} + func TestDeleteSecret_IdempotentOnMissing(t *testing.T) { c := fake.NewSimpleClientset() s := NewStore(c, testNamespace) diff --git a/src/runner-org-sync/internal/reconcile/reconcile.go b/src/runner-org-sync/internal/reconcile/reconcile.go index a67e0405d31..ea3ad8cc7b4 100644 --- a/src/runner-org-sync/internal/reconcile/reconcile.go +++ b/src/runner-org-sync/internal/reconcile/reconcile.go @@ -13,12 +13,12 @@ import ( "errors" "fmt" "sort" - "strings" "altinn.studio/runner-org-sync/internal/cdn" "altinn.studio/runner-org-sync/internal/gitea" "altinn.studio/runner-org-sync/internal/k8sstate" corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/yaml" ) // Defaults used when the caller does not override. @@ -30,9 +30,10 @@ const ( // Failure stages, surfaced on Report.FailedOrgs[*].Stage. const ( - StageMint = "mint" - StageCreate = "create" - StageDelete = "delete" + StageValidate = "validate" + StageMint = "mint" + StageCreate = "create" + StageDelete = "delete" ) // OrgSource produces the discovered org population (typically the CDN client). @@ -50,7 +51,7 @@ type TokenMinter interface { // SecretStore is the cluster I/O surface the Reconciler needs. type SecretStore interface { ListManagedSecrets(ctx context.Context) ([]corev1.Secret, error) - SecretExists(ctx context.Context, name string) (bool, error) + RegistrationSecretStatus(ctx context.Context, name, org string) (k8sstate.RegistrationSecretState, error) CreateRegistrationSecret(ctx context.Context, name, org, token string) error DeleteSecret(ctx context.Context, name string) error ApplyConfigMap(ctx context.Context, name string, data map[string]string) (bool, error) @@ -171,16 +172,24 @@ func (r *Reconciler) Run(ctx context.Context) (Report, error) { orgHasSecret := make(map[string]bool, len(desired)) for _, org := range desired { name := r.secretNameFor(org.Code) - exists, err := r.store.SecretExists(ctx, name) + status, err := r.store.RegistrationSecretStatus(ctx, name, org.Code) if err != nil { - // SecretExists hitting a transient apiserver error is fatal for + // The lookup hitting a transient apiserver error is fatal for // this run — without this lookup we cannot decide mint-or-skip. - return report, fmt.Errorf("reconcile: check secret %s: %w", name, err) + return report, fmt.Errorf("reconcile: check registration secret %s: %w", name, err) } - if exists { + switch status { + case k8sstate.RegistrationSecretValid: report.SecretsSkipped = append(report.SecretsSkipped, org.Code) orgHasSecret[org.Code] = true continue + case k8sstate.RegistrationSecretInvalid: + report.FailedOrgs = append(report.FailedOrgs, OrgFailure{ + Org: org.Code, + Stage: StageValidate, + Err: fmt.Errorf("registration secret %s exists but is not a valid runner token secret", name), + }) + continue } token, err := r.minter.MintRegistrationToken(ctx, org.Code) if err != nil { @@ -287,15 +296,26 @@ func (r *Reconciler) filter(orgs []cdn.Org, report *Report) []cdn.Org { // on the consumer side, so a runner-org-sync-supplied replicas field would // be ignored at best and misleading at worst. func renderRunners(orgs []string, secretNameFor func(org string) string) string { - if len(orgs) == 0 { - return "[]\n" - } - var b strings.Builder + runners := make([]runnerConfig, 0, len(orgs)) for _, org := range orgs { - fmt.Fprintf(&b, "- name: %s\n", org) - fmt.Fprintf(&b, " registrationTokenSecretName: %s\n", secretNameFor(org)) + runners = append(runners, runnerConfig{ + Name: org, + RegistrationTokenSecretName: secretNameFor(org), + }) + } + out, err := yaml.Marshal(runners) + if err != nil { + // The input is a simple slice of strings rendered into a static struct; + // yaml.Marshal should not fail. Keep the historical empty-list output + // if it ever does, so the chart does not reference stale runners. + return "[]\n" } - return b.String() + return string(out) +} + +type runnerConfig struct { + Name string `json:"name"` + RegistrationTokenSecretName string `json:"registrationTokenSecretName"` } func orgCodes(orgs []cdn.Org) []string { diff --git a/src/runner-org-sync/internal/reconcile/reconcile_test.go b/src/runner-org-sync/internal/reconcile/reconcile_test.go index 158d85fa29a..eb84e58f9fb 100644 --- a/src/runner-org-sync/internal/reconcile/reconcile_test.go +++ b/src/runner-org-sync/internal/reconcile/reconcile_test.go @@ -10,6 +10,7 @@ import ( "altinn.studio/runner-org-sync/internal/cdn" "altinn.studio/runner-org-sync/internal/gitea" + "altinn.studio/runner-org-sync/internal/k8sstate" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -43,7 +44,7 @@ func (m *stubMinter) MintRegistrationToken(_ context.Context, org string) (strin type stubStore struct { managed []corev1.Secret - existsByName map[string]bool + statusByName map[string]k8sstate.RegistrationSecretState createErr map[string]error deleteErr map[string]error applyCMErr error @@ -58,7 +59,7 @@ type stubStore struct { func newStubStore() *stubStore { return &stubStore{ - existsByName: map[string]bool{}, + statusByName: map[string]k8sstate.RegistrationSecretState{}, createErr: map[string]error{}, deleteErr: map[string]error{}, createdOrgs: map[string]string{}, @@ -70,11 +71,14 @@ func (s *stubStore) ListManagedSecrets(_ context.Context) ([]corev1.Secret, erro return s.managed, s.listErr } -func (s *stubStore) SecretExists(_ context.Context, name string) (bool, error) { +func (s *stubStore) RegistrationSecretStatus(_ context.Context, name, _ string) (k8sstate.RegistrationSecretState, error) { if s.existsErr != nil { - return false, s.existsErr + return "", s.existsErr } - return s.existsByName[name], nil + if status, ok := s.statusByName[name]; ok { + return status, nil + } + return k8sstate.RegistrationSecretMissing, nil } func (s *stubStore) CreateRegistrationSecret(_ context.Context, name, org, _ string) error { @@ -83,7 +87,7 @@ func (s *stubStore) CreateRegistrationSecret(_ context.Context, name, org, _ str } s.createdSecrets = append(s.createdSecrets, name) s.createdOrgs[name] = org - s.existsByName[name] = true + s.statusByName[name] = k8sstate.RegistrationSecretValid return nil } @@ -190,8 +194,8 @@ func TestRun_IdempotentReRun(t *testing.T) { minter := &stubMinter{} store := newStubStore() // pre-populate existing state — secrets exist for both orgs and we own them. - store.existsByName["altinn-gitea-runner-ttd-secret"] = true - store.existsByName["altinn-gitea-runner-brg-secret"] = true + store.statusByName["altinn-gitea-runner-ttd-secret"] = k8sstate.RegistrationSecretValid + store.statusByName["altinn-gitea-runner-brg-secret"] = k8sstate.RegistrationSecretValid store.managed = []corev1.Secret{ managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), managedSecret("altinn-gitea-runner-brg-secret", "brg"), @@ -226,8 +230,8 @@ func TestRun_OrgAdded(t *testing.T) { }} minter := &stubMinter{} store := newStubStore() - store.existsByName["altinn-gitea-runner-ttd-secret"] = true - store.existsByName["altinn-gitea-runner-brg-secret"] = true + store.statusByName["altinn-gitea-runner-ttd-secret"] = k8sstate.RegistrationSecretValid + store.statusByName["altinn-gitea-runner-brg-secret"] = k8sstate.RegistrationSecretValid store.managed = []corev1.Secret{ managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), managedSecret("altinn-gitea-runner-brg-secret", "brg"), @@ -254,8 +258,8 @@ func TestRun_OrgRemoved(t *testing.T) { }} minter := &stubMinter{} store := newStubStore() - store.existsByName["altinn-gitea-runner-ttd-secret"] = true - store.existsByName["altinn-gitea-runner-brg-secret"] = true + store.statusByName["altinn-gitea-runner-ttd-secret"] = k8sstate.RegistrationSecretValid + store.statusByName["altinn-gitea-runner-brg-secret"] = k8sstate.RegistrationSecretValid store.managed = []corev1.Secret{ managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), managedSecret("altinn-gitea-runner-brg-secret", "brg"), @@ -347,6 +351,30 @@ func TestRun_GiteaPartialFailure(t *testing.T) { } } +func TestRun_InvalidExistingSecretIsNotProjected(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + }} + minter := &stubMinter{} + store := newStubStore() + store.statusByName["altinn-gitea-runner-ttd-secret"] = k8sstate.RegistrationSecretInvalid + + rep := runReconciler(t, src, minter, store, []string{"ttd"}, false) + + if rep.Outcome != OutcomePartial { + t.Errorf("outcome = %v, want partial", rep.Outcome) + } + if len(rep.FailedOrgs) != 1 || rep.FailedOrgs[0].Org != "ttd" || rep.FailedOrgs[0].Stage != StageValidate { + t.Errorf("FailedOrgs = %v, want [{ttd validate ...}]", rep.FailedOrgs) + } + if len(minter.calls) != 0 { + t.Errorf("minter should not be called when same-name invalid secret exists; got %v", minter.calls) + } + if got := store.appliedCMData[ConfigMapDataKey]; got != "[]\n" { + t.Errorf("ConfigMap body = %q, want empty runner list", got) + } +} + // --- additional coverage ---------------------------------------------------- // Auth failures hit every org with the same PAT — failing fast avoids a @@ -449,7 +477,7 @@ func TestRun_SyncAllSkipsWhitelist(t *testing.T) { func TestRun_UnlabelledManagedSecretIsSkippedOnDelete(t *testing.T) { src := &stubSource{orgs: []cdn.Org{{Code: "ttd", Environments: []string{"tt02"}}}} store := newStubStore() - store.existsByName["altinn-gitea-runner-ttd-secret"] = true + store.statusByName["altinn-gitea-runner-ttd-secret"] = k8sstate.RegistrationSecretValid store.managed = []corev1.Secret{ managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), // drift: managed-by label but no org label From 0b69c8e552209609eabbef33e9935da5e925beae Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Mon, 18 May 2026 09:59:40 +0200 Subject: [PATCH 29/33] remove unused secret existence helper --- .../internal/k8sstate/k8sstate.go | 13 ----------- .../internal/k8sstate/k8sstate_test.go | 23 ++++--------------- 2 files changed, 4 insertions(+), 32 deletions(-) diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate.go b/src/runner-org-sync/internal/k8sstate/k8sstate.go index 23fff3328e2..9bff6f71276 100644 --- a/src/runner-org-sync/internal/k8sstate/k8sstate.go +++ b/src/runner-org-sync/internal/k8sstate/k8sstate.go @@ -75,19 +75,6 @@ func (s *Store) ListManagedSecrets(ctx context.Context) ([]corev1.Secret, error) return list.Items, nil } -// SecretExists reports whether a Secret with the given name exists in the -// store's namespace. A NotFound error is reported as ok=false, nil error. -func (s *Store) SecretExists(ctx context.Context, name string) (bool, error) { - _, err := s.client.CoreV1().Secrets(s.namespace).Get(ctx, name, metav1.GetOptions{}) - if err == nil { - return true, nil - } - if apierrors.IsNotFound(err) { - return false, nil - } - return false, fmt.Errorf("k8sstate: get secret %s: %w", name, err) -} - // RegistrationSecretStatus reports whether the named Secret exists and has // the ownership labels and token data expected for the given org. func (s *Store) RegistrationSecretStatus(ctx context.Context, name, org string) (RegistrationSecretState, error) { diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go index 2a886ddb480..80bbdd3104d 100644 --- a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go +++ b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go @@ -6,6 +6,7 @@ import ( "testing" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" ) @@ -52,22 +53,6 @@ func TestCreateRegistrationSecret_AlreadyExists(t *testing.T) { } } -func TestSecretExists(t *testing.T) { - c := fake.NewSimpleClientset(&corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{Name: "exists", Namespace: testNamespace}, - }) - s := NewStore(c, testNamespace) - - ok, err := s.SecretExists(context.Background(), "exists") - if err != nil || !ok { - t.Errorf("SecretExists(exists) = %v, %v; want true, nil", ok, err) - } - ok, err = s.SecretExists(context.Background(), "missing") - if err != nil || ok { - t.Errorf("SecretExists(missing) = %v, %v; want false, nil", ok, err) - } -} - func TestRegistrationSecretStatus(t *testing.T) { c := fake.NewSimpleClientset( &corev1.Secret{ @@ -150,9 +135,9 @@ func TestDeleteSecret_RemovesExisting(t *testing.T) { if err := s.DeleteSecret(context.Background(), "x"); err != nil { t.Fatalf("unexpected error: %v", err) } - ok, _ := s.SecretExists(context.Background(), "x") - if ok { - t.Errorf("secret still exists after delete") + _, err := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "x", metav1.GetOptions{}) + if !apierrors.IsNotFound(err) { + t.Errorf("get deleted secret error = %v, want NotFound", err) } } From 105e918758b106dcfc013a785d938e5b36ce26e0 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Tue, 19 May 2026 10:58:28 +0200 Subject: [PATCH 30/33] add .lscache in gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 72734b75844..50c493dce8f 100644 --- a/.gitignore +++ b/.gitignore @@ -81,6 +81,7 @@ ipch/ *.opensdf *.sdf *.cachefile +*.lscache # Visual Studio profiler *.psess From bf0cc18f920148a034dfd109b944b6275905c048 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Thu, 21 May 2026 08:49:00 +0200 Subject: [PATCH 31/33] use config map in chart instead of static --- .github/workflows/deploy-gitea-runners.yaml | 2 +- .../templates/helm-release.yaml | 17 ++++----- charts/gitea-org-runner-config/values.yaml | 36 ------------------- .../internal/reconcile/reconcile.go | 20 ++++++----- .../internal/reconcile/reconcile_test.go | 3 +- 5 files changed, 21 insertions(+), 57 deletions(-) diff --git a/.github/workflows/deploy-gitea-runners.yaml b/.github/workflows/deploy-gitea-runners.yaml index a0569145312..1be4609b2d8 100644 --- a/.github/workflows/deploy-gitea-runners.yaml +++ b/.github/workflows/deploy-gitea-runners.yaml @@ -84,7 +84,7 @@ jobs: environment: ${{ matrix.environment }} config-chart-name: gitea-org-runner-config artifact-name: gitea-runners - helm-set-arguments: chartVersion=0.1.0+${{ needs.determine-tag.outputs.tag }},imageTag=${{ needs.determine-tag.outputs.tag }},releaseName=gitea-runners,environment=${{ matrix.environment }} + helm-set-arguments: chartVersion=0.1.0+${{ needs.determine-tag.outputs.tag }},imageTag=${{ needs.determine-tag.outputs.tag }},releaseName=gitea-runners trace-workflow: false trace-team-name: 'team-studio' secrets: diff --git a/charts/gitea-org-runner-config/templates/helm-release.yaml b/charts/gitea-org-runner-config/templates/helm-release.yaml index c614889ca13..ec4de160ee5 100644 --- a/charts/gitea-org-runner-config/templates/helm-release.yaml +++ b/charts/gitea-org-runner-config/templates/helm-release.yaml @@ -22,6 +22,12 @@ spec: kind: HelmRepository name: studio-charts namespace: default + valuesFrom: + # runner-org-sync writes this ConfigMap in studio-runners. The + # runners.yaml key contains root Helm values with the dynamic runners list. + - kind: ConfigMap + name: runner-org-list + valuesKey: runners.yaml values: image: tag: "{{ .Values.imageTag }}" @@ -68,17 +74,6 @@ spec: memory: 2Gi ephemeral-storage: 20Gi - # Slim per-org list. The workload chart pairs each entry's `name` with - # chart-level gitea/keda config to render a ScaledJob, and mounts - # `registrationTokenSecretName` via secretKeyRef on the runner pod. - # No per-runner environmentVariables block needed; no `replicas` - # (KEDA owns scaling via maxConcurrent*). - runners: - {{- range index .Values.runners .Values.environment }} - - name: {{ .name }} - registrationTokenSecretName: {{ .registrationTokenSecretName }} - {{- end }} - tolerations: - key: "purpose" operator: "Equal" diff --git a/charts/gitea-org-runner-config/values.yaml b/charts/gitea-org-runner-config/values.yaml index 109632e7ecb..94db1972cec 100644 --- a/charts/gitea-org-runner-config/values.yaml +++ b/charts/gitea-org-runner-config/values.yaml @@ -2,7 +2,6 @@ chartVersion: "" releaseName: "" imageTag: "" giteaInstanceUrl: "http://altinn-repositories-public.default.svc.cluster.local" -environment: "dev" runtimeClassName: "kata-vm-isolation" dockerInDocker: @@ -18,38 +17,3 @@ maxConcurrentDefault: 2 maxConcurrentOverrides: brg: 6 ssb: 5 - -# Environment-specific runner lists. Entries are intentionally slim: -# the workload chart synthesises all per-runner env vars (GITEA_INSTANCE_URL, -# GITEA_RUNNER_LABELS, registration-token secretKeyRef, etc.) from chart-level -# config plus the org name; no per-runner environmentVariables block is needed. -# `replicas` is also gone — KEDA's ScaledJob owns scaling via maxConcurrent*. -# -# dev/staging: only ttd -# prod: ttd, brg, dsb, ssb, ksdigi, pat, dibk, skm, sfvt -runners: - dev: - - name: ttd - registrationTokenSecretName: altinn-gitea-runner-ttd-secret - staging: - - name: ttd - registrationTokenSecretName: altinn-gitea-runner-ttd-secret - prod: - - name: ttd - registrationTokenSecretName: altinn-gitea-runner-ttd-secret - - name: brg - registrationTokenSecretName: altinn-gitea-runner-brg-secret - - name: dsb - registrationTokenSecretName: altinn-gitea-runner-dsb-secret - - name: ssb - registrationTokenSecretName: altinn-gitea-runner-ssb-secret - - name: ksdigi - registrationTokenSecretName: altinn-gitea-runner-ksdigi-secret - - name: pat - registrationTokenSecretName: altinn-gitea-runner-pat-secret - - name: dibk - registrationTokenSecretName: altinn-gitea-runner-dibk-secret - - name: skm - registrationTokenSecretName: altinn-gitea-runner-skm-secret - - name: sfvt - registrationTokenSecretName: altinn-gitea-runner-sfvt-secret diff --git a/src/runner-org-sync/internal/reconcile/reconcile.go b/src/runner-org-sync/internal/reconcile/reconcile.go index ea3ad8cc7b4..86dc2798def 100644 --- a/src/runner-org-sync/internal/reconcile/reconcile.go +++ b/src/runner-org-sync/internal/reconcile/reconcile.go @@ -287,10 +287,10 @@ func (r *Reconciler) filter(orgs []cdn.Org, report *Report) []cdn.Org { return out } -// renderRunners emits the YAML list consumed by the gitea-org-runner-config -// HelmRelease via Flux valuesFrom (targetPath: runners). Determinism via -// sorted input is required so unchanged state produces unchanged output and -// ApplyConfigMap detects "no change" correctly. +// renderRunners emits Helm values consumed by the gitea-org-runner-config +// HelmRelease via Flux valuesFrom. Determinism via sorted input is required +// so unchanged state produces unchanged output and ApplyConfigMap detects +// "no change" correctly. // // Replica count is deliberately omitted: scaling is owned by KEDA ScaledJobs // on the consumer side, so a runner-org-sync-supplied replicas field would @@ -303,16 +303,20 @@ func renderRunners(orgs []string, secretNameFor func(org string) string) string RegistrationTokenSecretName: secretNameFor(org), }) } - out, err := yaml.Marshal(runners) + out, err := yaml.Marshal(runnerValues{Runners: runners}) if err != nil { // The input is a simple slice of strings rendered into a static struct; - // yaml.Marshal should not fail. Keep the historical empty-list output - // if it ever does, so the chart does not reference stale runners. - return "[]\n" + // yaml.Marshal should not fail. Keep an empty runner list if it ever + // does, so the chart does not reference stale runners. + return "runners: []\n" } return string(out) } +type runnerValues struct { + Runners []runnerConfig `json:"runners"` +} + type runnerConfig struct { Name string `json:"name"` RegistrationTokenSecretName string `json:"registrationTokenSecretName"` diff --git a/src/runner-org-sync/internal/reconcile/reconcile_test.go b/src/runner-org-sync/internal/reconcile/reconcile_test.go index eb84e58f9fb..f3192242be6 100644 --- a/src/runner-org-sync/internal/reconcile/reconcile_test.go +++ b/src/runner-org-sync/internal/reconcile/reconcile_test.go @@ -172,6 +172,7 @@ func TestRun_ColdStart(t *testing.T) { t.Errorf("ConfigMapChanged = false, want true on cold start") } wantBody := strings.Join([]string{ + "runners:", "- name: brg", " registrationTokenSecretName: altinn-gitea-runner-brg-secret", "- name: dsb", @@ -370,7 +371,7 @@ func TestRun_InvalidExistingSecretIsNotProjected(t *testing.T) { if len(minter.calls) != 0 { t.Errorf("minter should not be called when same-name invalid secret exists; got %v", minter.calls) } - if got := store.appliedCMData[ConfigMapDataKey]; got != "[]\n" { + if got := store.appliedCMData[ConfigMapDataKey]; got != "runners: []\n" { t.Errorf("ConfigMap body = %q, want empty runner list", got) } } From 22cd2a4483962e88a354c1228bbc2e5a594ebdad Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Thu, 21 May 2026 09:04:47 +0200 Subject: [PATCH 32/33] cleanup syncroot --- .github/workflows/deploy-runner-org-sync.yaml | 2 +- .../studio/syncroot/base/runner-org-sync.yaml | 20 +------------------ infra/studio/syncroot/dev/kustomization.yaml | 18 ----------------- infra/studio/syncroot/prod/kustomization.yaml | 18 ----------------- .../syncroot/staging/kustomization.yaml | 17 ---------------- src/runner-org-sync/README.md | 4 ++-- .../infra/kustomize/{ => base}/cronjob.yaml | 0 .../kustomize/{ => base}/kustomization.yaml | 0 .../kustomize/{ => base}/networkpolicy.yaml | 0 .../infra/kustomize/{ => base}/role.yaml | 0 .../kustomize/{ => base}/rolebinding.yaml | 0 .../kustomize/{ => base}/serviceaccount.yaml | 0 .../{ => base}/triggerauthentication.yaml | 0 .../kustomize/dev/cronjob-env.patch.yaml | 18 +++++++++++++++++ .../infra/kustomize/dev/kustomization.yaml | 7 +++++++ .../kustomize/prod/cronjob-env.patch.yaml | 18 +++++++++++++++++ .../infra/kustomize/prod/kustomization.yaml | 7 +++++++ .../kustomize/staging/cronjob-env.patch.yaml | 18 +++++++++++++++++ .../kustomize/staging/kustomization.yaml | 7 +++++++ 19 files changed, 79 insertions(+), 75 deletions(-) rename src/runner-org-sync/infra/kustomize/{ => base}/cronjob.yaml (100%) rename src/runner-org-sync/infra/kustomize/{ => base}/kustomization.yaml (100%) rename src/runner-org-sync/infra/kustomize/{ => base}/networkpolicy.yaml (100%) rename src/runner-org-sync/infra/kustomize/{ => base}/role.yaml (100%) rename src/runner-org-sync/infra/kustomize/{ => base}/rolebinding.yaml (100%) rename src/runner-org-sync/infra/kustomize/{ => base}/serviceaccount.yaml (100%) rename src/runner-org-sync/infra/kustomize/{ => base}/triggerauthentication.yaml (100%) create mode 100644 src/runner-org-sync/infra/kustomize/dev/cronjob-env.patch.yaml create mode 100644 src/runner-org-sync/infra/kustomize/dev/kustomization.yaml create mode 100644 src/runner-org-sync/infra/kustomize/prod/cronjob-env.patch.yaml create mode 100644 src/runner-org-sync/infra/kustomize/prod/kustomization.yaml create mode 100644 src/runner-org-sync/infra/kustomize/staging/cronjob-env.patch.yaml create mode 100644 src/runner-org-sync/infra/kustomize/staging/kustomization.yaml diff --git a/.github/workflows/deploy-runner-org-sync.yaml b/.github/workflows/deploy-runner-org-sync.yaml index 768a02f0cdd..3bb963cfd21 100644 --- a/.github/workflows/deploy-runner-org-sync.yaml +++ b/.github/workflows/deploy-runner-org-sync.yaml @@ -68,7 +68,7 @@ jobs: run: docker push ${{ steps.vars.outputs.image-repo }} - name: patch base with image tag - working-directory: src/runner-org-sync/infra/kustomize + working-directory: src/runner-org-sync/infra/kustomize/base run: | export IMAGE="${{ steps.vars.outputs.image-repo }}" export IMAGE_TAG="${{ steps.vars.outputs.short-sha }}" diff --git a/infra/studio/syncroot/base/runner-org-sync.yaml b/infra/studio/syncroot/base/runner-org-sync.yaml index e7b449ebfdf..d6f9d4c4d55 100644 --- a/infra/studio/syncroot/base/runner-org-sync.yaml +++ b/infra/studio/syncroot/base/runner-org-sync.yaml @@ -26,28 +26,10 @@ spec: kind: OCIRepository name: runner-org-sync namespace: default - path: ./ + path: ./${ENVIRONMENT} prune: true timeout: 1m postBuild: - # These variables are double-substituted: the outer syncroot Kustomization - # (provisioned in altinn-studio-infra/provisioning/studio-flux-syncroot.tf) - # resolves ${...} on the right-hand sides first, then this inner - # Kustomization applies the resolved values to the kustomize manifests - # pulled from the OCI artifact above. substitute: ENVIRONMENT: ${ENVIRONMENT} RUNNER_ORG_SYNC_ENTRA_CLIENT_ID: ${RUNNER_ORG_SYNC_ENTRA_CLIENT_ID} - # The kustomize manifests in src/runner-org-sync/infra/kustomize also - # reference these two; they must be sourced from somewhere before this - # Kustomization can reconcile cleanly. Three options: - # 1. Add to studio-flux-syncroot.tf's postBuild.substitute (alongside - # ENTRA_CLIENT_ID). Best for values Terraform already knows - # (KEYVAULT_NAME = azurerm_key_vault.kv.name). - # 2. Hardcode literal values here. Best for stable per-env values - # with no Terraform counterpart. - # 3. Move the runner-org-sync resource to per-env syncroot overlays - # (infra/studio/syncroot/{dev,staging,prod}/) so each env can - # patch its own values. - # RUNNER_ORG_SYNC_KEYVAULT_NAME: ??? - # RUNNER_ORG_SYNC_ORGS: ??? diff --git a/infra/studio/syncroot/dev/kustomization.yaml b/infra/studio/syncroot/dev/kustomization.yaml index c1b3ee6e286..27bb5cb0b48 100644 --- a/infra/studio/syncroot/dev/kustomization.yaml +++ b/infra/studio/syncroot/dev/kustomization.yaml @@ -2,21 +2,3 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - ../base - -# Per-env substitutions for the runner-org-sync Flux Kustomization. KV name -# follows the cluster-wide convention altinn-studio--kv. ORGS lists the -# whitelist runner-org-sync uses to filter the CDN. Add an entry here when -# onboarding a new dev-tier org. -patches: - - target: - group: kustomize.toolkit.fluxcd.io - version: v1 - kind: Kustomization - name: runner-org-sync - patch: |- - - op: add - path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_KEYVAULT_NAME - value: altinn-studio-dev-kv - - op: add - path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_ORGS - value: ttd diff --git a/infra/studio/syncroot/prod/kustomization.yaml b/infra/studio/syncroot/prod/kustomization.yaml index cb1df0d95a9..222e156a829 100644 --- a/infra/studio/syncroot/prod/kustomization.yaml +++ b/infra/studio/syncroot/prod/kustomization.yaml @@ -17,21 +17,3 @@ patches: metadata: name: altinn-altinity-agents namespace: default - - # Per-env substitutions for the runner-org-sync Flux Kustomization. KV - # name follows the cluster-wide convention altinn-studio--kv. ORGS - # lists the whitelist runner-org-sync uses to filter the CDN; this list - # must agree with the prod entry of runners in - # charts/gitea-org-runner-config/values.yaml. - - target: - group: kustomize.toolkit.fluxcd.io - version: v1 - kind: Kustomization - name: runner-org-sync - patch: |- - - op: add - path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_KEYVAULT_NAME - value: altinn-studio-prod-kv - - op: add - path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_ORGS - value: ttd,brg,dsb,ssb,ksdigi,pat,dibk,skm,sfvt diff --git a/infra/studio/syncroot/staging/kustomization.yaml b/infra/studio/syncroot/staging/kustomization.yaml index 3edc82bf974..222e156a829 100644 --- a/infra/studio/syncroot/staging/kustomization.yaml +++ b/infra/studio/syncroot/staging/kustomization.yaml @@ -17,20 +17,3 @@ patches: metadata: name: altinn-altinity-agents namespace: default - - # Per-env substitutions for the runner-org-sync Flux Kustomization. KV - # name follows the cluster-wide convention altinn-studio--kv. ORGS - # lists the whitelist runner-org-sync uses to filter the CDN. Add an - # entry here when onboarding a new staging-tier org. - - target: - group: kustomize.toolkit.fluxcd.io - version: v1 - kind: Kustomization - name: runner-org-sync - patch: |- - - op: add - path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_KEYVAULT_NAME - value: altinn-studio-staging-kv - - op: add - path: /spec/postBuild/substitute/RUNNER_ORG_SYNC_ORGS - value: ttd diff --git a/src/runner-org-sync/README.md b/src/runner-org-sync/README.md index f8112ca51ce..c243d3dd06b 100644 --- a/src/runner-org-sync/README.md +++ b/src/runner-org-sync/README.md @@ -7,7 +7,7 @@ runners running in the Studio cluster. ## What it does Each scheduled run (cadence configured by `spec.schedule` in -`infra/kustomize/cronjob.yaml`): +`infra/kustomize/base/cronjob.yaml`): 1. Loads the **admin** Gitea PAT from Azure Key Vault (via Workload Identity), or from a local env var override for development. @@ -96,7 +96,7 @@ Three distinct credentials, three storage strategies: ### KEDA wiring The `TriggerAuthentication/keda-gitea-auth` lives in -`infra/kustomize/triggerauthentication.yaml` — ships with this service so +`infra/kustomize/base/triggerauthentication.yaml` — ships with this service so the Secret writer and the auth ref are deployed atomically. Three names must agree across this folder and the workload chart: diff --git a/src/runner-org-sync/infra/kustomize/cronjob.yaml b/src/runner-org-sync/infra/kustomize/base/cronjob.yaml similarity index 100% rename from src/runner-org-sync/infra/kustomize/cronjob.yaml rename to src/runner-org-sync/infra/kustomize/base/cronjob.yaml diff --git a/src/runner-org-sync/infra/kustomize/kustomization.yaml b/src/runner-org-sync/infra/kustomize/base/kustomization.yaml similarity index 100% rename from src/runner-org-sync/infra/kustomize/kustomization.yaml rename to src/runner-org-sync/infra/kustomize/base/kustomization.yaml diff --git a/src/runner-org-sync/infra/kustomize/networkpolicy.yaml b/src/runner-org-sync/infra/kustomize/base/networkpolicy.yaml similarity index 100% rename from src/runner-org-sync/infra/kustomize/networkpolicy.yaml rename to src/runner-org-sync/infra/kustomize/base/networkpolicy.yaml diff --git a/src/runner-org-sync/infra/kustomize/role.yaml b/src/runner-org-sync/infra/kustomize/base/role.yaml similarity index 100% rename from src/runner-org-sync/infra/kustomize/role.yaml rename to src/runner-org-sync/infra/kustomize/base/role.yaml diff --git a/src/runner-org-sync/infra/kustomize/rolebinding.yaml b/src/runner-org-sync/infra/kustomize/base/rolebinding.yaml similarity index 100% rename from src/runner-org-sync/infra/kustomize/rolebinding.yaml rename to src/runner-org-sync/infra/kustomize/base/rolebinding.yaml diff --git a/src/runner-org-sync/infra/kustomize/serviceaccount.yaml b/src/runner-org-sync/infra/kustomize/base/serviceaccount.yaml similarity index 100% rename from src/runner-org-sync/infra/kustomize/serviceaccount.yaml rename to src/runner-org-sync/infra/kustomize/base/serviceaccount.yaml diff --git a/src/runner-org-sync/infra/kustomize/triggerauthentication.yaml b/src/runner-org-sync/infra/kustomize/base/triggerauthentication.yaml similarity index 100% rename from src/runner-org-sync/infra/kustomize/triggerauthentication.yaml rename to src/runner-org-sync/infra/kustomize/base/triggerauthentication.yaml diff --git a/src/runner-org-sync/infra/kustomize/dev/cronjob-env.patch.yaml b/src/runner-org-sync/infra/kustomize/dev/cronjob-env.patch.yaml new file mode 100644 index 00000000000..cef86f098b3 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/dev/cronjob-env.patch.yaml @@ -0,0 +1,18 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: runner-org-sync +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: runner-org-sync + env: + - name: RUNNER_ORG_SYNC_ORGS + value: "ttd" + - name: RUNNER_ORG_SYNC_KEYVAULT_NAME + value: "altinn-studio-dev-kv" + - name: OTEL_RESOURCE_ATTRIBUTES + value: "service.namespace=studio-runners,deployment.environment=dev" diff --git a/src/runner-org-sync/infra/kustomize/dev/kustomization.yaml b/src/runner-org-sync/infra/kustomize/dev/kustomization.yaml new file mode 100644 index 00000000000..bb34589ad59 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/dev/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../base + +patches: + - path: cronjob-env.patch.yaml diff --git a/src/runner-org-sync/infra/kustomize/prod/cronjob-env.patch.yaml b/src/runner-org-sync/infra/kustomize/prod/cronjob-env.patch.yaml new file mode 100644 index 00000000000..06f01b2d23c --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/prod/cronjob-env.patch.yaml @@ -0,0 +1,18 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: runner-org-sync +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: runner-org-sync + env: + - name: RUNNER_ORG_SYNC_ORGS + value: "ttd,brg,dsb,ssb,ksdigi,pat,dibk,skm,sfvt" + - name: RUNNER_ORG_SYNC_KEYVAULT_NAME + value: "altinn-studio-prod-kv" + - name: OTEL_RESOURCE_ATTRIBUTES + value: "service.namespace=studio-runners,deployment.environment=prod" diff --git a/src/runner-org-sync/infra/kustomize/prod/kustomization.yaml b/src/runner-org-sync/infra/kustomize/prod/kustomization.yaml new file mode 100644 index 00000000000..bb34589ad59 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/prod/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../base + +patches: + - path: cronjob-env.patch.yaml diff --git a/src/runner-org-sync/infra/kustomize/staging/cronjob-env.patch.yaml b/src/runner-org-sync/infra/kustomize/staging/cronjob-env.patch.yaml new file mode 100644 index 00000000000..b94cc3aec06 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/staging/cronjob-env.patch.yaml @@ -0,0 +1,18 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: runner-org-sync +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: runner-org-sync + env: + - name: RUNNER_ORG_SYNC_ORGS + value: "ttd" + - name: RUNNER_ORG_SYNC_KEYVAULT_NAME + value: "altinn-studio-staging-kv" + - name: OTEL_RESOURCE_ATTRIBUTES + value: "service.namespace=studio-runners,deployment.environment=staging" diff --git a/src/runner-org-sync/infra/kustomize/staging/kustomization.yaml b/src/runner-org-sync/infra/kustomize/staging/kustomization.yaml new file mode 100644 index 00000000000..bb34589ad59 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/staging/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../base + +patches: + - path: cronjob-env.patch.yaml From d56032c9c84ae5278c08a449a86f8d06d9aaa7a9 Mon Sep 17 00:00:00 2001 From: Mirko Sekulic <23359247+mirkoSekulic@users.noreply.github.com> Date: Thu, 21 May 2026 09:55:29 +0200 Subject: [PATCH 33/33] support legacy secrets --- .../internal/k8sstate/k8sstate.go | 28 ++++++++++++++--- .../internal/k8sstate/k8sstate_test.go | 30 +++++++++++++++++++ 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate.go b/src/runner-org-sync/internal/k8sstate/k8sstate.go index 9bff6f71276..a37869a56b3 100644 --- a/src/runner-org-sync/internal/k8sstate/k8sstate.go +++ b/src/runner-org-sync/internal/k8sstate/k8sstate.go @@ -25,10 +25,12 @@ const ( LabelManagedBy = "app.kubernetes.io/managed-by" LabelComponent = "app.kubernetes.io/component" LabelOrg = "runner-org-sync.altinn.studio/org" + LabelFluxWatch = "reconcile.fluxcd.io/watch" ManagedBy = "runner-org-sync" ComponentRegToken = "runner-registration-token" ComponentRunnerCM = "runner-org-list" + FluxWatchEnabled = "Enabled" // SecretTokenKey is the data key inside per-org registration Secrets, // matching what the runner Deployment's secretKeyRef expects. @@ -88,14 +90,25 @@ func (s *Store) RegistrationSecretStatus(ctx context.Context, name, org string) if sec.Type != "" && sec.Type != corev1.SecretTypeOpaque { return RegistrationSecretInvalid, nil } - if sec.Labels[LabelManagedBy] != ManagedBy || - sec.Labels[LabelComponent] != ComponentRegToken || - sec.Labels[LabelOrg] != org { + if len(sec.Data[SecretTokenKey]) == 0 { return RegistrationSecretInvalid, nil } - if len(sec.Data[SecretTokenKey]) == 0 { + if hasConflictingLabel(sec.Labels, LabelManagedBy, ManagedBy) || + hasConflictingLabel(sec.Labels, LabelComponent, ComponentRegToken) || + hasConflictingLabel(sec.Labels, LabelOrg, org) { return RegistrationSecretInvalid, nil } + if sec.Labels == nil { + sec.Labels = map[string]string{} + } + labelsChanged := ensureLabel(sec.Labels, LabelManagedBy, ManagedBy) + labelsChanged = ensureLabel(sec.Labels, LabelComponent, ComponentRegToken) || labelsChanged + labelsChanged = ensureLabel(sec.Labels, LabelOrg, org) || labelsChanged + if labelsChanged { + if _, err := s.client.CoreV1().Secrets(s.namespace).Update(ctx, sec, metav1.UpdateOptions{}); err != nil { + return "", fmt.Errorf("k8sstate: adopt registration secret %s: %w", name, err) + } + } return RegistrationSecretValid, nil } @@ -146,6 +159,7 @@ func (s *Store) ApplyConfigMap(ctx context.Context, name string, data map[string Labels: map[string]string{ LabelManagedBy: ManagedBy, LabelComponent: ComponentRunnerCM, + LabelFluxWatch: FluxWatchEnabled, }, }, Data: data, @@ -167,6 +181,7 @@ func (s *Store) ApplyConfigMap(ctx context.Context, name string, data map[string } labelsChanged := ensureLabel(existing.Labels, LabelManagedBy, ManagedBy) labelsChanged = ensureLabel(existing.Labels, LabelComponent, ComponentRunnerCM) || labelsChanged + labelsChanged = ensureLabel(existing.Labels, LabelFluxWatch, FluxWatchEnabled) || labelsChanged if maps.Equal(existing.Data, data) && !labelsChanged { return false, nil } @@ -248,3 +263,8 @@ func ensureLabel(labels map[string]string, key, value string) bool { labels[key] = value return true } + +func hasConflictingLabel(labels map[string]string, key, expected string) bool { + value, ok := labels[key] + return ok && value != expected +} diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go index 80bbdd3104d..fd2565af435 100644 --- a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go +++ b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go @@ -77,6 +77,14 @@ func TestRegistrationSecretStatus(t *testing.T) { Type: corev1.SecretTypeOpaque, Data: map[string][]byte{SecretTokenKey: []byte("tok")}, }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "legacy-unlabeled", + Namespace: testNamespace, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{SecretTokenKey: []byte("tok")}, + }, &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: "empty-token", @@ -102,6 +110,7 @@ func TestRegistrationSecretStatus(t *testing.T) { {name: "valid", secretName: "valid", org: "ttd", want: RegistrationSecretValid}, {name: "missing", secretName: "missing", org: "ttd", want: RegistrationSecretMissing}, {name: "foreign same name", secretName: "foreign", org: "ttd", want: RegistrationSecretInvalid}, + {name: "legacy unlabeled", secretName: "legacy-unlabeled", org: "ttd", want: RegistrationSecretValid}, {name: "wrong org", secretName: "valid", org: "brg", want: RegistrationSecretInvalid}, {name: "empty token", secretName: "empty-token", org: "ttd", want: RegistrationSecretInvalid}, } @@ -117,6 +126,20 @@ func TestRegistrationSecretStatus(t *testing.T) { } }) } + + got, err := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "legacy-unlabeled", metav1.GetOptions{}) + if err != nil { + t.Fatalf("get adopted legacy secret: %v", err) + } + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("legacy managed-by label = %q, want %q", got.Labels[LabelManagedBy], ManagedBy) + } + if got.Labels[LabelComponent] != ComponentRegToken { + t.Errorf("legacy component label = %q, want %q", got.Labels[LabelComponent], ComponentRegToken) + } + if got.Labels[LabelOrg] != "ttd" { + t.Errorf("legacy org label = %q, want ttd", got.Labels[LabelOrg]) + } } func TestDeleteSecret_IdempotentOnMissing(t *testing.T) { @@ -206,6 +229,9 @@ func TestApplyConfigMap_CreatesWhenMissing(t *testing.T) { if cm.Labels[LabelManagedBy] != ManagedBy { t.Errorf("managed-by label missing, got %v", cm.Labels) } + if cm.Labels[LabelFluxWatch] != FluxWatchEnabled { + t.Errorf("flux watch label missing, got %v", cm.Labels) + } } func TestApplyConfigMap_NoOpOnSameContent(t *testing.T) { @@ -215,6 +241,7 @@ func TestApplyConfigMap_NoOpOnSameContent(t *testing.T) { Labels: map[string]string{ LabelManagedBy: ManagedBy, LabelComponent: ComponentRunnerCM, + LabelFluxWatch: FluxWatchEnabled, }, }, Data: map[string]string{"k": "v"}, @@ -255,6 +282,9 @@ func TestApplyConfigMap_UpdatesOnLabelDrift(t *testing.T) { if got.Labels[LabelComponent] != ComponentRunnerCM { t.Errorf("component label was not restored, got %v", got.Labels) } + if got.Labels[LabelFluxWatch] != FluxWatchEnabled { + t.Errorf("flux watch label was not restored, got %v", got.Labels) + } if got.Labels["custom"] != "keep" { t.Errorf("custom label was not preserved, got %v", got.Labels) }