diff --git a/.github/workflows/deploy-gitea-runners.yaml b/.github/workflows/deploy-gitea-runners.yaml index a0569145312..1be4609b2d8 100644 --- a/.github/workflows/deploy-gitea-runners.yaml +++ b/.github/workflows/deploy-gitea-runners.yaml @@ -84,7 +84,7 @@ jobs: environment: ${{ matrix.environment }} config-chart-name: gitea-org-runner-config artifact-name: gitea-runners - helm-set-arguments: chartVersion=0.1.0+${{ needs.determine-tag.outputs.tag }},imageTag=${{ needs.determine-tag.outputs.tag }},releaseName=gitea-runners,environment=${{ matrix.environment }} + helm-set-arguments: chartVersion=0.1.0+${{ needs.determine-tag.outputs.tag }},imageTag=${{ needs.determine-tag.outputs.tag }},releaseName=gitea-runners trace-workflow: false trace-team-name: 'team-studio' secrets: diff --git a/.github/workflows/deploy-runner-org-sync.yaml b/.github/workflows/deploy-runner-org-sync.yaml new file mode 100644 index 00000000000..3bb963cfd21 --- /dev/null +++ b/.github/workflows/deploy-runner-org-sync.yaml @@ -0,0 +1,115 @@ +name: Deploy Runner Org Sync +on: + push: + branches: [main, feat/runners-autoscaling] + paths: + - 'src/runner-org-sync/**' + - '.github/workflows/deploy-runner-org-sync.yaml' + workflow_dispatch: + inputs: + environments: + description: 'Environments to deploy to. Multiple environments can be specified by separating them with a comma.' + required: false + default: 'dev' + +permissions: + id-token: write + contents: read + +jobs: + construct-environments-array: + uses: ./.github/workflows/template-studio-construct-environments.yaml + with: + # Push to the autoscaling feature branch deploys to staging only. + # Push to main and manual dispatches fall through to github.event.inputs + # (which is empty on push to main → template applies its own defaults, + # and on dispatch → the value the user typed in the form). + inputs: ${{ (github.event_name == 'push' && github.ref == 'refs/heads/feat/runners-autoscaling') && '{"environments":"staging"}' || toJSON(github.event.inputs) }} + + push-artifact: + name: Push runner-org-sync as OCI artifact + runs-on: ubuntu-latest + environment: dev + env: + REGISTRY_NAME: altinntjenestercontainerregistry + outputs: + CONFIG_REPO: ${{ steps.vars.outputs.config-repo }} + defaults: + run: + working-directory: src/runner-org-sync + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - name: Set vars + id: vars + run: | + SHA="${GITHUB_SHA::10}" + echo "short-sha=$SHA" >> "$GITHUB_OUTPUT" + echo "image-repo=altinntjenestercontainerregistry.azurecr.io/altinn-studio/runner-org-sync:${SHA}" >> "$GITHUB_OUTPUT" + echo "config-repo=altinntjenestercontainerregistry.azurecr.io/altinn-studio/configs/runner-org-sync-repo:${SHA}" >> "$GITHUB_OUTPUT" + + - name: az login + uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID_FC }} + tenant-id: ${{ secrets.AZURE_TENANT_ID_FC }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID_FC }} + + - name: az acr login + run: az acr login --name ${{ env.REGISTRY_NAME }} + + - name: flux install + uses: fluxcd/flux2/action@bfa461ed2153ae5e0cca6bce08e0845268fb3088 # v2.8.2 + + - name: docker build + run: docker build -t ${{ steps.vars.outputs.image-repo }} -f Dockerfile . + + - name: push image + run: docker push ${{ steps.vars.outputs.image-repo }} + + - name: patch base with image tag + working-directory: src/runner-org-sync/infra/kustomize/base + run: | + export IMAGE="${{ steps.vars.outputs.image-repo }}" + export IMAGE_TAG="${{ steps.vars.outputs.short-sha }}" + yq -i '.metadata.annotations["altinn.studio/image"] = env(IMAGE)' cronjob.yaml + yq -i '.metadata.annotations["altinn.studio/image-tag"] = env(IMAGE_TAG)' cronjob.yaml + + - name: push artifact + working-directory: src/runner-org-sync/infra/kustomize + run: | + flux push artifact oci://${{ steps.vars.outputs.config-repo }} \ + --provider=azure \ + --reproducible \ + --path="." \ + --source="$(git config --get remote.origin.url)" \ + --revision="$(git branch --show-current)/$(git rev-parse HEAD)" + + tag: + name: Tag artifact + needs: [push-artifact, construct-environments-array] + runs-on: ubuntu-latest + environment: ${{ matrix.environment }} + env: + REGISTRY_NAME: altinntjenestercontainerregistry + strategy: + matrix: + include: ${{ fromJSON(needs.construct-environments-array.outputs.result) }} + steps: + - name: az login + uses: azure/login@a457da9ea143d694b1b9c7c869ebb04ebe844ef5 # v2 + with: + client-id: ${{ secrets.AZURE_CLIENT_ID_FC }} + tenant-id: ${{ secrets.AZURE_TENANT_ID_FC }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID_FC }} + + - name: az acr login + run: az acr login --name ${{ env.REGISTRY_NAME }} + + - name: flux install + uses: fluxcd/flux2/action@bfa461ed2153ae5e0cca6bce08e0845268fb3088 # v2.8.2 + + - name: tag artifact + run: | + flux tag artifact oci://${{ needs.push-artifact.outputs.CONFIG_REPO }} \ + --tag ${{ matrix.environment }} diff --git a/.gitignore b/.gitignore index 72734b75844..50c493dce8f 100644 --- a/.gitignore +++ b/.gitignore @@ -81,6 +81,7 @@ ipch/ *.opensdf *.sdf *.cachefile +*.lscache # Visual Studio profiler *.psess diff --git a/charts/gitea-org-runner-config/templates/helm-release.yaml b/charts/gitea-org-runner-config/templates/helm-release.yaml index 72991df60e5..ec4de160ee5 100644 --- a/charts/gitea-org-runner-config/templates/helm-release.yaml +++ b/charts/gitea-org-runner-config/templates/helm-release.yaml @@ -22,6 +22,12 @@ spec: kind: HelmRepository name: studio-charts namespace: default + valuesFrom: + # runner-org-sync writes this ConfigMap in studio-runners. The + # runners.yaml key contains root Helm values with the dynamic runners list. + - kind: ConfigMap + name: runner-org-list + valuesKey: runners.yaml values: image: tag: "{{ .Values.imageTag }}" @@ -32,6 +38,32 @@ spec: enabled: {{ .Values.dockerInDocker.enabled }} shareWorkspace: enabled: {{ .Values.shareWorkspace.enabled }} + + # Chart-level Gitea config: the workload chart synthesises each runner + # pod's GITEA_INSTANCE_URL / RUNNER_LABELS env vars from here. The KEDA + # github-runner scaler uses apiUrl to poll org queue depth. + gitea: + instanceUrl: "{{ .Values.giteaInstanceUrl }}" + apiUrl: "{{ .Values.giteaInstanceUrl }}/api/v1" + # The runner advertises `ubuntu-latest:host` to act_runner (`:host` + # selects run-on-host mode); the KEDA scaler filters by `ubuntu-latest` + # only, matching what workflows put in `runs-on:`. + runnerLabels: "ubuntu-latest:host" + scalerLabels: "ubuntu-latest" + + # KEDA wiring: name of the TriggerAuthentication ScaledJobs reference. + # The CR is materialised by runner-org-sync's deploy in the same + # namespace (src/runner-org-sync/infra/kustomize/triggerauthentication.yaml). + keda: + authenticationRef: + name: keda-gitea-auth + + # Per-org concurrency caps. Default applies to every org; overrides + # keyed by org name take precedence. + maxConcurrentDefault: {{ .Values.maxConcurrentDefault | default 2 }} + maxConcurrentOverrides: + {{- toYaml (.Values.maxConcurrentOverrides | default dict) | nindent 6 }} + resources: requests: cpu: 500m @@ -41,24 +73,7 @@ spec: cpu: "2" memory: 2Gi ephemeral-storage: 20Gi - runners: - {{- $giteaUrl := .Values.giteaInstanceUrl }} - {{- range index .Values.runners .Values.environment }} - - name: {{ .name }} - replicas: {{ .replicas }} - environmentVariables: - - name: GITEA_INSTANCE_URL - value: "{{ $giteaUrl }}" - - name: GITEA_RUNNER_EPHEMERAL - value: "1" - - name: GITEA_RUNNER_LABELS - value: "ubuntu-latest:host" - - name: GITEA_RUNNER_REGISTRATION_TOKEN - valueFrom: - secretKeyRef: - name: {{ .registrationTokenSecretName }} - key: token - {{- end }} + tolerations: - key: "purpose" operator: "Equal" diff --git a/charts/gitea-org-runner-config/values.yaml b/charts/gitea-org-runner-config/values.yaml index ab5765bf009..94db1972cec 100644 --- a/charts/gitea-org-runner-config/values.yaml +++ b/charts/gitea-org-runner-config/values.yaml @@ -2,7 +2,6 @@ chartVersion: "" releaseName: "" imageTag: "" giteaInstanceUrl: "http://altinn-repositories-public.default.svc.cluster.local" -environment: "dev" runtimeClassName: "kata-vm-isolation" dockerInDocker: @@ -10,43 +9,11 @@ dockerInDocker: shareWorkspace: enabled: true -# Environment-specific runner configurations -# dev/staging: only ttd -# prod: ttd, brg, dsb, ssb, ksdigi, pat, dibk, skm -runners: - dev: - - name: ttd - replicas: 1 - registrationTokenSecretName: altinn-gitea-runner-ttd-secret - staging: - - name: ttd - replicas: 1 - registrationTokenSecretName: altinn-gitea-runner-ttd-secret - prod: - - name: ttd - replicas: 1 - registrationTokenSecretName: altinn-gitea-runner-ttd-secret - - name: brg - replicas: 6 - registrationTokenSecretName: altinn-gitea-runner-brg-secret - - name: dsb - replicas: 1 - registrationTokenSecretName: altinn-gitea-runner-dsb-secret - - name: ssb - replicas: 5 - registrationTokenSecretName: altinn-gitea-runner-ssb-secret - - name: ksdigi - replicas: 1 - registrationTokenSecretName: altinn-gitea-runner-ksdigi-secret - - name: pat - replicas: 1 - registrationTokenSecretName: altinn-gitea-runner-pat-secret - - name: dibk - replicas: 1 - registrationTokenSecretName: altinn-gitea-runner-dibk-secret - - name: skm - replicas: 1 - registrationTokenSecretName: altinn-gitea-runner-skm-secret - - name: sfvt - replicas: 1 - registrationTokenSecretName: altinn-gitea-runner-sfvt-secret +# Per-org concurrency caps applied to KEDA ScaledJobs in the workload chart. +# `maxConcurrentDefault` applies to every org; `maxConcurrentOverrides` keys +# named exceptions per org. Overrides for orgs absent from the active +# environment's runners list are harmlessly ignored. +maxConcurrentDefault: 2 +maxConcurrentOverrides: + brg: 6 + ssb: 5 diff --git a/charts/gitea-org-runner/templates/deployment.yaml b/charts/gitea-org-runner/templates/deployment.yaml deleted file mode 100644 index d7523e93dbc..00000000000 --- a/charts/gitea-org-runner/templates/deployment.yaml +++ /dev/null @@ -1,157 +0,0 @@ -{{- range .Values.runners }} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "gitea-org-runner.fullname" $ }}-{{ .name }} - labels: - {{- include "gitea-org-runner.labels" $ | nindent 4 }} - runner: {{ .name }} -spec: - replicas: {{ .replicas | default 1 }} - strategy: - type: RollingUpdate - rollingUpdate: - maxSurge: 1 - maxUnavailable: 0 - selector: - matchLabels: - {{- include "gitea-org-runner.selectorLabels" $ | nindent 6 }} - runner: {{ .name }} - template: - metadata: - {{- with $.Values.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "gitea-org-runner.labels" $ | nindent 8 }} - runner: {{ .name }} - {{- with $.Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with $.Values.runtimeClassName }} - runtimeClassName: {{ . }} - {{- end }} - {{- with $.Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $.Values.podSecurityContext }} - securityContext: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- if $.Values.dockerInDocker.enabled }} - initContainers: - - name: dockerd - image: "{{ $.Values.dockerInDocker.image.repository }}:{{ $.Values.dockerInDocker.image.tag }}" - imagePullPolicy: {{ $.Values.dockerInDocker.image.pullPolicy }} - restartPolicy: Always - securityContext: - privileged: true - env: - - name: DOCKER_TLS_CERTDIR - value: "/certs" - command: ["sh", "-c"] - args: - - | - if [ "$(df -PT /var/lib/docker | awk 'NR==2 {print $2}')" = "virtiofs" ]; then - apk add --no-cache e2fsprogs && - truncate -s {{ $.Values.dockerInDocker.diskSize | default "15G" }} /tmp/docker-disk.img && - mkfs.ext4 -F /tmp/docker-disk.img && - mount /tmp/docker-disk.img /var/lib/docker - fi - exec dockerd-entrypoint.sh --host=tcp://0.0.0.0:2376 - startupProbe: - tcpSocket: - port: 2376 - periodSeconds: 2 - failureThreshold: 45 - volumeMounts: - - name: docker-certs - mountPath: /certs - {{- if $.Values.shareWorkspace.enabled }} - - name: act-cache - mountPath: {{ $.Values.shareWorkspace.path | default "/root/.cache/act" }} - {{- end }} - {{- end }} - containers: - - name: {{ $.Chart.Name }} - {{- with $.Values.securityContext }} - securityContext: - {{- toYaml . | nindent 12 }} - {{- end }} - image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag | default $.Chart.AppVersion }}" - imagePullPolicy: {{ $.Values.image.pullPolicy }} - env: - {{- if $.Values.dockerInDocker.enabled }} - - name: DOCKER_HOST - value: "tcp://localhost:2376" - - name: DOCKER_TLS_VERIFY - value: "1" - - name: DOCKER_CERT_PATH - value: "/certs/client" - {{- end }} - {{- if .environmentVariables }} - {{- range $variable := .environmentVariables}} - - name: {{ $variable.name }} - {{- if $variable.value }} - value: {{ $variable.value | quote }} - {{- end }} - {{- if $variable.valueFrom }} - valueFrom: - secretKeyRef: - name: {{ $variable.valueFrom.secretKeyRef.name }} - key: {{ $variable.valueFrom.secretKeyRef.key }} - {{- end }} - {{- end }} - {{- end }} - - {{- with $.Values.resources }} - resources: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- if or $.Values.volumeMounts $.Values.dockerInDocker.enabled $.Values.shareWorkspace.enabled }} - volumeMounts: - {{- with $.Values.volumeMounts }} - {{- toYaml . | nindent 12 }} - {{- end }} - {{- if $.Values.dockerInDocker.enabled }} - - name: docker-certs - mountPath: /certs - readOnly: true - {{- end }} - {{- if $.Values.shareWorkspace.enabled }} - - name: act-cache - mountPath: {{ $.Values.shareWorkspace.path | default "/root/.cache/act" }} - {{- end }} - {{- end }} - {{- if or $.Values.volumes $.Values.dockerInDocker.enabled $.Values.shareWorkspace.enabled }} - volumes: - {{- with $.Values.volumes }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- if $.Values.dockerInDocker.enabled }} - - name: docker-certs - emptyDir: {} - {{- end }} - {{- if $.Values.shareWorkspace.enabled }} - - name: act-cache - emptyDir: - sizeLimit: {{ $.Values.shareWorkspace.sizeLimit | default "10Gi" }} - {{- end }} - {{- end }} - {{- with $.Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $.Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with $.Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} diff --git a/charts/gitea-org-runner/templates/scaledjob.yaml b/charts/gitea-org-runner/templates/scaledjob.yaml new file mode 100644 index 00000000000..efb26b671ae --- /dev/null +++ b/charts/gitea-org-runner/templates/scaledjob.yaml @@ -0,0 +1,181 @@ +{{- range .Values.runners }} +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledJob +metadata: + name: {{ include "gitea-org-runner.fullname" $ }}-{{ .name }} + labels: + {{- include "gitea-org-runner.labels" $ | nindent 4 }} + runner: {{ .name }} +spec: + # Per-org concurrency cap: take the override if defined, otherwise the chart-wide default. + # KEDA creates at most this many concurrent Jobs from the template below. + maxReplicaCount: {{ index $.Values.maxConcurrentOverrides .name | default $.Values.maxConcurrentDefault }} + minReplicaCount: 0 + pollingInterval: {{ $.Values.keda.pollingInterval | default 30 }} + successfulJobsHistoryLimit: {{ $.Values.keda.successfulJobsHistoryLimit | default 5 }} + failedJobsHistoryLimit: {{ $.Values.keda.failedJobsHistoryLimit | default 5 }} + jobTargetRef: + backoffLimit: 0 + ttlSecondsAfterFinished: {{ $.Values.keda.ttlSecondsAfterFinished | default 300 }} + # Cap on total Job runtime. Without this, a runner pod that registers + # but never gets dispatched a workflow polls Gitea forever (act_runner + # has no built-in idle timeout). Set generously enough that legitimate + # long workflows can finish, but short enough that orphaned idle + # runners get reaped. + activeDeadlineSeconds: {{ $.Values.keda.activeDeadlineSeconds | default 1800 }} + template: + metadata: + labels: + {{- include "gitea-org-runner.labels" $ | nindent 10 }} + runner: {{ .name }} + {{- with $.Values.podLabels }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with $.Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 10 }} + {{- end }} + spec: + {{- with $.Values.runtimeClassName }} + runtimeClassName: {{ . }} + {{- end }} + restartPolicy: Never + {{- with $.Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with $.Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- if $.Values.dockerInDocker.enabled }} + initContainers: + - name: dockerd + image: "{{ $.Values.dockerInDocker.image.repository }}:{{ $.Values.dockerInDocker.image.tag }}" + imagePullPolicy: {{ $.Values.dockerInDocker.image.pullPolicy }} + restartPolicy: Always + securityContext: + privileged: true + env: + - name: DOCKER_TLS_CERTDIR + value: "/certs" + command: ["sh", "-c"] + args: + - | + if [ "$(df -PT /var/lib/docker | awk 'NR==2 {print $2}')" = "virtiofs" ]; then + apk add --no-cache e2fsprogs && + truncate -s {{ $.Values.dockerInDocker.diskSize | default "15G" }} /tmp/docker-disk.img && + mkfs.ext4 -F /tmp/docker-disk.img && + mount /tmp/docker-disk.img /var/lib/docker + fi + exec dockerd-entrypoint.sh --host=tcp://0.0.0.0:2376 + startupProbe: + tcpSocket: + port: 2376 + periodSeconds: 2 + failureThreshold: 45 + volumeMounts: + - name: docker-certs + mountPath: /certs + {{- if $.Values.shareWorkspace.enabled }} + - name: act-cache + mountPath: {{ $.Values.shareWorkspace.path | default "/root/.cache/act" }} + {{- end }} + {{- end }} + containers: + - name: {{ $.Chart.Name }} + {{- with $.Values.securityContext }} + securityContext: + {{- toYaml . | nindent 14 }} + {{- end }} + image: "{{ $.Values.image.repository }}:{{ $.Values.image.tag | default $.Chart.AppVersion }}" + imagePullPolicy: {{ $.Values.image.pullPolicy }} + env: + {{- if $.Values.dockerInDocker.enabled }} + - name: DOCKER_HOST + value: "tcp://localhost:2376" + - name: DOCKER_TLS_VERIFY + value: "1" + - name: DOCKER_CERT_PATH + value: "/certs/client" + {{- end }} + # Runner registration + behaviour. Constructed from chart-level config + # (giteaInstanceUrl, runnerLabels) and the per-org Secret produced by + # runner-org-sync — no per-runner environmentVariables block required. + - name: GITEA_INSTANCE_URL + value: {{ $.Values.gitea.instanceUrl | quote }} + - name: GITEA_RUNNER_EPHEMERAL + value: "1" + - name: GITEA_RUNNER_LABELS + value: {{ $.Values.gitea.runnerLabels | quote }} + - name: GITEA_RUNNER_REGISTRATION_TOKEN + valueFrom: + secretKeyRef: + name: {{ .registrationTokenSecretName }} + key: token + {{- with $.Values.extraEnv }} + {{- toYaml . | nindent 14 }} + {{- end }} + {{- with $.Values.resources }} + resources: + {{- toYaml . | nindent 14 }} + {{- end }} + {{- if or $.Values.volumeMounts $.Values.dockerInDocker.enabled $.Values.shareWorkspace.enabled }} + volumeMounts: + {{- with $.Values.volumeMounts }} + {{- toYaml . | nindent 14 }} + {{- end }} + {{- if $.Values.dockerInDocker.enabled }} + - name: docker-certs + mountPath: /certs + readOnly: true + {{- end }} + {{- if $.Values.shareWorkspace.enabled }} + - name: act-cache + mountPath: {{ $.Values.shareWorkspace.path | default "/root/.cache/act" }} + {{- end }} + {{- end }} + {{- if or $.Values.volumes $.Values.dockerInDocker.enabled $.Values.shareWorkspace.enabled }} + volumes: + {{- with $.Values.volumes }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- if $.Values.dockerInDocker.enabled }} + - name: docker-certs + emptyDir: {} + {{- end }} + {{- if $.Values.shareWorkspace.enabled }} + - name: act-cache + emptyDir: + sizeLimit: {{ $.Values.shareWorkspace.sizeLimit | default "10Gi" }} + {{- end }} + {{- end }} + {{- with $.Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with $.Values.affinity }} + affinity: + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with $.Values.tolerations }} + tolerations: + {{- toYaml . | nindent 10 }} + {{- end }} + triggers: + - type: github-runner + metadata: + # github-runner scaler is GitHub-compatible; pointed at Gitea's API URL + # it polls the org's Actions queue depth via the same shape. + # NOTE: the metadata key is `githubApiURL` — lowercase 'g', capital + # 'A' in 'Api', all-caps 'URL'. Any other casing is silently ignored + # and the scaler falls back to https://api.github.com. + githubApiURL: {{ $.Values.gitea.apiUrl | quote }} + owner: {{ .name | quote }} + runnerScope: "org" + labels: {{ $.Values.gitea.scalerLabels | default "ubuntu-latest" | quote }} + targetWorkflowQueueLength: {{ $.Values.keda.targetWorkflowQueueLength | default "1" | quote }} + authenticationRef: + name: {{ $.Values.keda.authenticationRef.name }} +{{- end }} diff --git a/charts/gitea-org-runner/values.yaml b/charts/gitea-org-runner/values.yaml index 6f178abbdda..4abcbb74a53 100644 --- a/charts/gitea-org-runner/values.yaml +++ b/charts/gitea-org-runner/values.yaml @@ -56,32 +56,71 @@ shareWorkspace: path: "/root/.cache/act" sizeLimit: "10Gi" -# List of runners to deploy. Each runner will create a separate deployment. +# Gitea connection details. Used both for the runner registration env vars +# inside each Job pod, and for the KEDA scaler's queue-depth polling URL. +gitea: + # Base URL of the Gitea instance (no /api/v1 suffix). Injected as + # GITEA_INSTANCE_URL on the runner container so act_runner registers here. + instanceUrl: "http://altinn-repositories-public.default.svc.cluster.local" + # Full API URL including /api/v1. Consumed by the KEDA github-runner scaler + # which polls the Actions queue. Gitea's Actions API is GitHub-compatible. + apiUrl: "http://altinn-repositories-public.default.svc.cluster.local/api/v1" + # Labels advertised by the runner via GITEA_RUNNER_LABELS env var. The + # `:host` suffix is act_runner's runner-type hint (run jobs natively in + # the pod instead of in docker); only the part BEFORE the colon is the + # label name workflows match against with `runs-on:`. + runnerLabels: "ubuntu-latest:host" + # The label name (without runner-type suffix) the KEDA scaler filters + # workflow runs by. Must match the value workflows put in `runs-on:`. + # Derived from runnerLabels by stripping the ":" suffix; expose + # separately so a workflow expecting "ubuntu-latest" isn't filtered out + # by a scaler looking for the literal "ubuntu-latest:host". + scalerLabels: "ubuntu-latest" + +# KEDA / ScaledJob configuration. Per-org caps live below in maxConcurrent*. +keda: + # Reference to a TriggerAuthentication CRD in the same namespace as the + # ScaledJob, holding the read-only Gitea PAT the scaler uses to poll the + # queue. Created out-of-band (one per cluster), not by this chart. + authenticationRef: + name: keda-gitea-auth + # How often KEDA polls Gitea per ScaledJob. 30s is the KEDA default; raise + # if Gitea complains about request rate (1 req per org per pollingInterval). + pollingInterval: 30 + # KEDA cleans up completed Jobs older than this many seconds. + ttlSecondsAfterFinished: 300 + # Hard cap on total Job runtime. Pod is killed if it exceeds this even + # while polling for work. Set to a value larger than the longest legitimate + # workflow you expect. Default 1800 = 30 min; raise if workflows can run + # longer, lower if you want orphaned idle runners reaped faster. + activeDeadlineSeconds: 1800 + # K8s keeps the last N successful / failed Job records visible. + successfulJobsHistoryLimit: 5 + failedJobsHistoryLimit: 5 + # How many queued workflows each runner is expected to drain. With "1", + # KEDA wants one new runner per queued workflow (subject to maxReplicaCount). + targetWorkflowQueueLength: "1" + +# Per-org concurrency caps. The chart applies maxConcurrentDefault to every +# org unless overridden by an explicit entry in maxConcurrentOverrides. +# maxConcurrentOverrides is keyed by org code (matching runners[].name). +maxConcurrentDefault: 2 +maxConcurrentOverrides: {} + # brg: 6 + # ssb: 5 + +# List of orgs to render a ScaledJob for. Populated by the consumer (typically +# the gitea-org-runner-config wrapper chart via Flux valuesFrom from the +# runner-org-list ConfigMap that runner-org-sync writes). +# +# Shape: [{ name, registrationTokenSecretName }]. The chart constructs all +# runner env vars (GITEA_INSTANCE_URL, GITEA_RUNNER_LABELS, etc.) from +# chart-level config; no per-runner environmentVariables block is needed. runners: [] # - name: ttd - # replicas: 5 - # environmentVariables: - # - name: GITEA_INSTANCE_URL - # value: "https://gitea.example.com" - # - name: GITEA_RUNNER_REGISTRATION_TOKEN - # valueFrom: - # secretKeyRef: - # name: gitea-runner-ttd-token - # key: token - # - name: GITEA_RUNNER_NAME - # value: "ttd-runner" + # registrationTokenSecretName: altinn-gitea-runner-ttd-secret # - name: brg - # replicas: 3 - # environmentVariables: - # - name: GITEA_INSTANCE_URL - # value: "https://gitea.example.com" - # - name: GITEA_RUNNER_REGISTRATION_TOKEN - # valueFrom: - # secretKeyRef: - # name: gitea-runner-brg-token - # key: token - # - name: GITEA_RUNNER_NAME - # value: "brg-runner" + # registrationTokenSecretName: altinn-gitea-runner-brg-secret # This is for setting Kubernetes Annotations to a Pod. # For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ @@ -95,7 +134,6 @@ podSecurityContext: # securityContext: # privileged: false - # capabilities: # drop: # - ALL @@ -103,12 +141,7 @@ podSecurityContext: # runAsNonRoot: true # runAsUser: 1000 - resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. # limits: # cpu: 100m # memory: 128Mi @@ -116,16 +149,11 @@ resources: {} # cpu: 100m # memory: 128Mi -# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/ -autoscaling: - enabled: false - minReplicas: 1 - maxReplicas: 100 - targetCPUUtilizationPercentage: 80 - # targetMemoryUtilizationPercentage: 80 +# Optional extra env vars merged onto the runner container. +extraEnv: [] + # - name: SOMETHING + # value: "..." nodeSelector: {} - tolerations: [] - affinity: {} diff --git a/infra/studio/syncroot/base/keda.yaml b/infra/studio/syncroot/base/keda.yaml new file mode 100644 index 00000000000..9feba499df3 --- /dev/null +++ b/infra/studio/syncroot/base/keda.yaml @@ -0,0 +1,62 @@ +--- +# HelmRepository + HelmRelease live in `default` (the outer syncroot's +# targetNamespace) because the keda namespace doesn't exist yet — the +# chart's install creates it via install.createNamespace below. The chart +# still installs INTO `keda` via spec.targetNamespace; only the Flux source +# and release plumbing live in default. +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: keda + namespace: default +spec: + # KEDA's canonical Helm repository (the URL their docs link to). The OCI + # variant at oci://ghcr.io/kedacore/charts returns 403 on anonymous pulls, + # so we use HTTPS — same chart contents, no auth quirks. + interval: 1h + url: https://kedacore.github.io/charts +--- +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: keda + namespace: default +spec: + interval: 10m + timeout: 5m + releaseName: keda + targetNamespace: keda + install: + # Helm creates the keda namespace at install time so the operator + # resources (Deployments, Services, ConfigMaps...) have somewhere to land. + createNamespace: true + remediation: + retries: 5 + upgrade: + remediation: + retries: 5 + chart: + spec: + chart: keda + version: 2.19.0 + sourceRef: + kind: HelmRepository + name: keda + namespace: default + values: + # KEDA's defaults are sensible. Keep this block intentionally small; + # any per-environment knobs (e.g. higher resource limits in prod) can + # be added later via Flux postBuild substitution or per-env overlays. + resources: + operator: + requests: + cpu: 100m + memory: 128Mi + limits: + memory: 1Gi + metricServer: + requests: + cpu: 100m + memory: 128Mi + limits: + memory: 1Gi diff --git a/infra/studio/syncroot/base/kustomization.yaml b/infra/studio/syncroot/base/kustomization.yaml index 9241bdd628f..ad4c5a1221c 100644 --- a/infra/studio/syncroot/base/kustomization.yaml +++ b/infra/studio/syncroot/base/kustomization.yaml @@ -11,3 +11,5 @@ resources: - lhci.yaml - observability.yaml - otel-operator.yaml + - keda.yaml + - runner-org-sync.yaml diff --git a/infra/studio/syncroot/base/runner-org-sync.yaml b/infra/studio/syncroot/base/runner-org-sync.yaml new file mode 100644 index 00000000000..d6f9d4c4d55 --- /dev/null +++ b/infra/studio/syncroot/base/runner-org-sync.yaml @@ -0,0 +1,35 @@ +apiVersion: source.toolkit.fluxcd.io/v1 +kind: OCIRepository +metadata: + name: runner-org-sync + namespace: default +spec: + interval: 5m + # Single artifact, multi-env tagged. Mirrors the lhci pattern (vs. Designer + # which uses per-env artifact names). The deploy-runner-org-sync workflow + # in .github/workflows pushes one artifact per build SHA, then tags it + # with the environment name in the `tag` matrix job. + url: oci://altinntjenestercontainerregistry.azurecr.io/altinn-studio/configs/runner-org-sync-repo + ref: + tag: ${ENVIRONMENT} + provider: azure +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: runner-org-sync + namespace: default +spec: + interval: 5m + targetNamespace: studio-runners + sourceRef: + kind: OCIRepository + name: runner-org-sync + namespace: default + path: ./${ENVIRONMENT} + prune: true + timeout: 1m + postBuild: + substitute: + ENVIRONMENT: ${ENVIRONMENT} + RUNNER_ORG_SYNC_ENTRA_CLIENT_ID: ${RUNNER_ORG_SYNC_ENTRA_CLIENT_ID} diff --git a/src/runner-org-sync/.dockerignore b/src/runner-org-sync/.dockerignore new file mode 100644 index 00000000000..dcfabf0e3b1 --- /dev/null +++ b/src/runner-org-sync/.dockerignore @@ -0,0 +1,15 @@ +bin/ +dist/ +coverage.out +coverage.html +*.test +.git/ +.gitignore +.golangci.yml +Makefile +README.md +infra/ +test/ +**/testdata/ +**/*_test.go +.DS_Store diff --git a/src/runner-org-sync/.gitignore b/src/runner-org-sync/.gitignore new file mode 100644 index 00000000000..e874f071658 --- /dev/null +++ b/src/runner-org-sync/.gitignore @@ -0,0 +1,6 @@ +bin/ +dist/ +coverage.out +coverage.html +*.test +.DS_Store diff --git a/src/runner-org-sync/.golangci.yml b/src/runner-org-sync/.golangci.yml new file mode 100644 index 00000000000..7ce3d3492db --- /dev/null +++ b/src/runner-org-sync/.golangci.yml @@ -0,0 +1,40 @@ +run: + timeout: 5m + tests: true + +linters: + default: none + enable: + - errcheck + - errorlint + - gocritic + - gosec + - govet + - ineffassign + - revive + - staticcheck + - unused + +linters-settings: + errorlint: + errorf: true + asserts: true + comparison: true + revive: + severity: warning + rules: + - name: exported + - name: var-naming + - name: error-return + - name: error-naming + - name: error-strings + - name: unused-parameter + - name: receiver-naming + - name: indent-error-flow + - name: package-comments + +issues: + exclude-rules: + - path: _test\.go + linters: + - gosec diff --git a/src/runner-org-sync/Dockerfile b/src/runner-org-sync/Dockerfile new file mode 100644 index 00000000000..eb50ed9771a --- /dev/null +++ b/src/runner-org-sync/Dockerfile @@ -0,0 +1,36 @@ +# syntax=docker/dockerfile:1.7 + +ARG GO_VERSION=1.26 + +# --- build ------------------------------------------------------------------ +FROM golang:${GO_VERSION}-alpine AS build + +WORKDIR /src + +# Module cache: download deps first so source changes don't re-download. +COPY go.mod go.sum ./ +RUN --mount=type=cache,target=/go/pkg/mod \ + go mod download + +COPY . . + +# Static binary so we can run on distroless/static without a libc. +# -trimpath strips local paths from stack traces, -s -w trims symbol tables. +RUN --mount=type=cache,target=/go/pkg/mod \ + --mount=type=cache,target=/root/.cache/go-build \ + CGO_ENABLED=0 GOOS=linux go build \ + -trimpath \ + -ldflags="-s -w" \ + -o /out/runner-org-sync \ + ./cmd/runner-org-sync + +# --- runtime ---------------------------------------------------------------- +# Distroless static (nonroot variant): no shell, no package manager, runs as +# uid 65532. ca-certificates are bundled, so TLS to Azure KV and the CDN +# works without extra setup. +FROM gcr.io/distroless/static-debian12:nonroot + +COPY --from=build /out/runner-org-sync /runner-org-sync + +USER 65532:65532 +ENTRYPOINT ["/runner-org-sync"] diff --git a/src/runner-org-sync/Makefile b/src/runner-org-sync/Makefile new file mode 100644 index 00000000000..33c7d71b52f --- /dev/null +++ b/src/runner-org-sync/Makefile @@ -0,0 +1,33 @@ +SHELL := /bin/bash +BINARY := bin/runner-org-sync +PKG := ./... + +.PHONY: build +build: + @mkdir -p bin + go build -trimpath -ldflags="-s -w" -o $(BINARY) ./cmd/runner-org-sync + +.PHONY: test +test: + go test -race -count=1 $(PKG) + +.PHONY: test-cover +test-cover: + go test -race -count=1 -coverprofile=coverage.out $(PKG) + go tool cover -html=coverage.out -o coverage.html + +.PHONY: lint +lint: + golangci-lint run + +.PHONY: tidy +tidy: + go mod tidy + +.PHONY: vet +vet: + go vet $(PKG) + +.PHONY: clean +clean: + rm -rf bin coverage.out coverage.html diff --git a/src/runner-org-sync/README.md b/src/runner-org-sync/README.md new file mode 100644 index 00000000000..c243d3dd06b --- /dev/null +++ b/src/runner-org-sync/README.md @@ -0,0 +1,246 @@ +# runner-org-sync + +A small, idempotent Kubernetes CronJob that bridges the Altinn organisation +list (published on the public CDN) to the per-organisation Gitea Actions +runners running in the Studio cluster. + +## What it does + +Each scheduled run (cadence configured by `spec.schedule` in +`infra/kustomize/base/cronjob.yaml`): + +1. Loads the **admin** Gitea PAT from Azure Key Vault (via Workload Identity), + or from a local env var override for development. +2. Loads the **read-only** Gitea PAT from the same Key Vault (different + secret name). This is the PAT KEDA's `github-runner` scaler will use. +3. Fetches `altinn-orgs.json` from `https://altinncdn.no/orgs/altinn-orgs.json`. +4. Filters orgs to those with at least one declared `environments` entry, + then intersects with a whitelist supplied via env var. +5. For each org in the desired set: + - if a `Secret altinn-gitea-runner--secret` already exists, leaves it + alone — registered tokens are preserved across reconciles, + - otherwise mints a fresh registration token via Gitea's admin API and + creates the Secret. +6. Deletes Secrets for orgs that are no longer in the desired set. +7. Writes a single `ConfigMap/runner-org-list` projecting the desired set; + the `gitea-org-runner-config` HelmRelease picks this up via Flux + `valuesFrom` and renders one KEDA `ScaledJob` per entry. +8. Projects the read-only PAT into an Opaque `Secret/keda-gitea-pat` (or + the name configured via env). KEDA's `TriggerAuthentication` references + this Secret. The Secret is created on first run; on subsequent runs it + is updated only when the KV value has changed. + +Continue-on-partial-failure: a single org failing to mint does not abort the +run. Failed orgs are simply omitted from this tick's ConfigMap and retried +on the next; failures surface through metrics (`runner_org_sync.org.reconcile_errors`) +rather than CronJob exit codes. + +## Architecture + +``` + Azure Key Vault + │ + │ Workload Identity + ▼ + altinncdn.no ──► runner-org-sync (CronJob) + altinn-orgs.json │ + │ filter: environments != ∅ ∧ whitelist + ▼ + ┌─────────────────────────────────────────────────────┐ + │ studio-runners ns │ + │ │ + │ per-org Secrets ConfigMap KEDA Secret │ + │ ┌──────────────┐ ┌─────────────────┐ ┌─────────┐ │ + │ │ ttd-secret │ │ runner-org-list │ │ keda- │ │ + │ │ brg-secret │ │ - ttd │ │ gitea- │ │ + │ │ dsb-secret │ │ - brg │ │ pat │ │ + │ │ ... │ │ - dsb │ │ │ │ + │ └──────┬───────┘ └────────┬────────┘ └────┬────┘ │ + │ │ │ │ │ + └──────────┼───────────────────┼───────────────┼──────┘ + │ │ valuesFrom │ + │ ▼ │ + │ ┌───────────────────────────┐ │ secret + │ │ gitea-org-runner-config │ │ TargetRef + │ │ HelmRelease (Flux) │ │ + │ │ renders one ScaledJob │ │ + │ │ per org-in-ConfigMap │ │ + │ └────────────┬──────────────┘ │ + │ │ ▼ + │ ▼ ┌──────────────────┐ + │ ┌──────────────┐ │ TriggerAuth │ + │ │ ScaledJob │◄─┤ keda-gitea-auth │ + │ │ (per org) │ └──────────────────┘ + │ └──────┬───────┘ + │ secretKeyRef │ KEDA creates Jobs on demand + ▼ ▼ + ┌──────────────────────────────────────────────────────┐ + │ Jobs (one per workflow; pod registers, runs, │ + │ exits; GC'd after ttlSecondsAfterFinished) │ + └──────────────────────────────────────────────────────┘ + │ + ▼ + OTel collector + (traces + metrics + logs at + otel-router.observability:4317) +``` + +Three distinct credentials, three storage strategies: + +| Credential | Sensitivity | Storage | +| ------------------------------------------ | ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Gitea admin PAT (mints tokens for any org) | High | Azure Key Vault, fetched at pod start via Workload Identity. Never persisted in K8s. | +| Per-org runner registration token | Lower (scoped to one org) | K8s Secret `altinn-gitea-runner--secret`, key `token`. Minted by runner-org-sync on first appearance of the org, consumed by the runner Pod (created by KEDA's ScaledJob) via `secretKeyRef`. | +| Read-only Gitea PAT for KEDA scaler | Lower (read-only on orgs) | Azure Key Vault → projected to K8s Secret `keda-gitea-pat`, key `token`, by runner-org-sync each tick. Consumed by KEDA's `TriggerAuthentication`. Rotates when the KV value changes (≤ tick + 30s). | + +### KEDA wiring + +The `TriggerAuthentication/keda-gitea-auth` lives in +`infra/kustomize/base/triggerauthentication.yaml` — ships with this service so +the Secret writer and the auth ref are deployed atomically. Three names +must agree across this folder and the workload chart: + +| Where | Field | Value | +| ------------------------------------- | ------------------------------------------------------ | -------------------------- | +| `cronjob.yaml` (env) | `RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME` / `_SECRET_KEY` | `keda-gitea-pat` / `token` | +| `triggerauthentication.yaml` | `secretTargetRef.name` / `.key` | `keda-gitea-pat` / `token` | +| `charts/gitea-org-runner/values.yaml` | `keda.authenticationRef.name` | `keda-gitea-auth` | + +The chart only consumes the TriggerAuth name as a reference; it does not +define the Secret or the TriggerAuth itself. Renaming any of the above +requires updating all four entries together — otherwise KEDA scalers +fail with `auth ref not found`. + +## Configuration + +All settings come from environment variables. The loader fails fast at +startup and aggregates every validation problem into one error. + +| Variable | Required | Purpose | +| ----------------------------------------------- | ------------------- | -------------------------------------------------------------------------------------------------- | +| `RUNNER_ORG_SYNC_GITEA_URL` | yes | Base URL for Gitea admin API | +| `RUNNER_ORG_SYNC_ORGS_JSON_URL` | yes | URL of `altinn-orgs.json` | +| `RUNNER_ORG_SYNC_OUTPUT_NAMESPACE` | yes | Target namespace (e.g. `studio-runners`) | +| `RUNNER_ORG_SYNC_SECRET_NAME_PATTERN` | yes | Must contain the `{org}` placeholder, e.g. `altinn-gitea-runner-{org}-secret` | +| `RUNNER_ORG_SYNC_CONFIGMAP_NAME` | yes | e.g. `runner-org-list` | +| `RUNNER_ORG_SYNC_KEYVAULT_NAME` | if no env admin PAT | Azure Key Vault name (shared by both PATs) | +| `RUNNER_ORG_SYNC_KEYVAULT_SECRET_NAME` | if no env admin PAT | KV secret name holding the **admin** PAT | +| `RUNNER_ORG_SYNC_KEDA_PAT_KEYVAULT_SECRET_NAME` | if no env KEDA PAT | KV secret name holding the **read-only** PAT for KEDA | +| `RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME` | yes | Name of the K8s Secret to write the read-only PAT into (e.g. `keda-gitea-pat`) | +| `RUNNER_ORG_SYNC_KEDA_PAT_SECRET_KEY` | yes | Data key inside that Secret (e.g. `token`) | +| `RUNNER_ORG_SYNC_SYNC_ALL` | no | `true` to skip the whitelist filter | +| `RUNNER_ORG_SYNC_ORGS` | if `SYNC_ALL=false` | CSV whitelist, e.g. `ttd,brg,dsb` | +| `RUNNER_ORG_SYNC_GITEA_PAT` | no | Local-dev bypass for admin PAT lookup. Source is logged at startup. | +| `RUNNER_ORG_SYNC_KEDA_PAT` | no | Local-dev bypass for KEDA PAT lookup. Source is logged at startup. | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | no | OTel collector endpoint (defaults via SDK) | +| `OTEL_SERVICE_NAME` | no | Defaults to `runner-org-sync` | +| `OTEL_RESOURCE_ATTRIBUTES` | no | e.g. `deployment.environment=dev` | +| `AZURE_*` | injected | Workload Identity webhook fills `AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_FEDERATED_TOKEN_FILE` | + +## Reconcile algorithm + +``` +adminPAT = loadPAT(env override → else KV) +kedaPAT = loadPAT(env override → else KV) + +desired = (orgs with non-empty environments) ∩ whitelist (or all if syncAll=true) +existing_secrets = managed Secrets in the output namespace + +for org in desired: + if Secret exists for org: + skip (preserve registered token) + else: + mint registration token via Gitea + create Secret + +for secret in existing_secrets: + if secret.org not in desired: + delete Secret + +apply ConfigMap with one entry per (desired ∩ orgs whose Secret now exists) + +apply Opaque Secret with key=token, value=kedaPAT + (no-op if existing value matches; update otherwise) +``` + +Existing per-org Secrets are never re-minted; this preserves any in-flight +runner registrations and avoids churn on ScaledJobs that already work. The +KEDA Secret IS updated when the KV value changes, so KV rotation propagates +automatically within one tick + KEDA's polling interval. Deletions remove +only the K8s Secret; orphaned Gitea-side runner records are left to go idle +(cleanup is a separate concern). + +## Observability + +This service emits OpenTelemetry traces and metrics; logs are kept thin and +intended for `kubectl logs` triage only. + +**Traces** — one root span per reconcile run, with per-stage children. Per-org +work surfaces as span events on the parent span (`org.token.minted`, +`org.secret.created`, `org.secret.deleted`, `org.skipped`). + +**Metrics** — see the package source for the canonical list. Highlights: + +- `runner_org_sync.reconcile.duration` (histogram, by outcome) +- `runner_org_sync.secrets.created` / `.deleted` / `.skipped` (counters) +- `runner_org_sync.org.reconcile_errors` (counter, by org and stage) — + **the signal worth paging on if sustained non-zero** +- `runner_org_sync.{gitea,cdn,keyvault}.call.duration` (histograms) + +**Logs** — JSON via `slog`, ~7 lines per healthy run plus any WARNs: + +``` +reconcile.start run_id=... +pat.loaded scope=admin source=keyvault len=40 +pat.loaded scope=keda source=keyvault len=40 +orgs.kept count=9 orgs=[ttd,brg,dsb,...] +org.reconcile.failed org=dsb stage=mint err=... (WARN, only on failure) +keda.secret.applied secret=keda-gitea-pat changed=true|false +reconcile.end duration_ms=... outcome=success|partial|failure +``` + +The `pat.loaded` lines surface the source per credential — accidental +fallback to env override in a non-local environment is immediately visible. +`keda.secret.applied changed=true` is the audit trail for a KV rotation +propagation. + +## Local development + +Both PATs can come from env vars instead of Key Vault, sidestepping the +need for Azure auth on a laptop: + +```sh +export RUNNER_ORG_SYNC_GITEA_PAT='your-local-or-test-admin-pat' +export RUNNER_ORG_SYNC_KEDA_PAT='your-local-or-test-readonly-pat' +export RUNNER_ORG_SYNC_GITEA_URL='http://localhost:3000' +export RUNNER_ORG_SYNC_ORGS_JSON_URL='https://altinncdn.no/orgs/altinn-orgs.json' +export RUNNER_ORG_SYNC_OUTPUT_NAMESPACE='studio-runners' +export RUNNER_ORG_SYNC_SECRET_NAME_PATTERN='altinn-gitea-runner-{org}-secret' +export RUNNER_ORG_SYNC_CONFIGMAP_NAME='runner-org-list' +export RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME='keda-gitea-pat' +export RUNNER_ORG_SYNC_KEDA_PAT_SECRET_KEY='token' +export RUNNER_ORG_SYNC_ORGS='ttd,brg' + +go run ./cmd/runner-org-sync +``` + +The startup log will read `pat.loaded scope=admin source=env` and +`pat.loaded scope=keda source=env`, making any accidental fallback in a +non-local environment immediately visible. + +## Testing + +```sh +make test # unit tests with race detector +make test-cover # coverage report at coverage.html +make lint # golangci-lint +``` + +Unit tests use stdlib `testing`, `net/http/httptest`, and +`k8s.io/client-go/kubernetes/fake`. No testify, no other test frameworks. + +End-to-end integration testing (kind-based, real Gitea + real KEDA, scenarios +TBD) is **in progress** — the approach is being designed alongside the +staging rollout rather than scaffolded up-front. Workload Identity is +Azure-specific and won't be covered by kind tests regardless; that path is +verified manually in a real cluster. diff --git a/src/runner-org-sync/cmd/runner-org-sync/main.go b/src/runner-org-sync/cmd/runner-org-sync/main.go new file mode 100644 index 00000000000..52af9e539c2 --- /dev/null +++ b/src/runner-org-sync/cmd/runner-org-sync/main.go @@ -0,0 +1,281 @@ +// Command runner-org-sync runs one reconcile cycle: discover orgs from the +// Altinn CDN, mint missing per-org Gitea runner registration tokens, delete +// Secrets for orgs that fell out of the desired set, and project the runners +// ConfigMap. It is designed to run as a Kubernetes CronJob; each invocation +// is a fresh pod that reads what it needs, performs the work, and exits. +package main + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os" + "os/signal" + "syscall" + "time" + + "github.com/google/uuid" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/trace" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" + + "altinn.studio/runner-org-sync/internal/cdn" + "altinn.studio/runner-org-sync/internal/config" + "altinn.studio/runner-org-sync/internal/gitea" + "altinn.studio/runner-org-sync/internal/k8sstate" + "altinn.studio/runner-org-sync/internal/keyvault" + "altinn.studio/runner-org-sync/internal/reconcile" + "altinn.studio/runner-org-sync/internal/telemetry" +) + +const ( + telemetryShutdownTimeout = 10 * time.Second + serviceName = "runner-org-sync" +) + +func main() { + if err := run(); err != nil { + slog.Error("fatal", "err", err.Error()) + os.Exit(1) + } +} + +func run() error { + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer cancel() + + cfg, err := config.Load() + if err != nil { + return fmt.Errorf("config: %w", err) + } + + shutdown, err := telemetry.ConfigureOTel(ctx, serviceName) + if err != nil { + return fmt.Errorf("telemetry init: %w", err) + } + defer func() { + sctx, scancel := context.WithTimeout(context.Background(), telemetryShutdownTimeout) + defer scancel() + if err := shutdown(sctx); err != nil { + slog.Warn("telemetry shutdown returned error", "err", err.Error()) + } + }() + + metrics, err := telemetry.NewMetrics() + if err != nil { + return fmt.Errorf("telemetry metrics: %w", err) + } + + runID := uuid.NewString() + logger := slog.With("run_id", runID, "service", serviceName) + + pat, patSource, err := loadSecretFromKV(ctx, cfg.GiteaPATOverride, cfg.KeyVaultName, cfg.KeyVaultSecretName) + if err != nil { + return fmt.Errorf("load admin PAT: %w", err) + } + logger.Info("pat.loaded", "scope", "admin", "source", string(patSource), "len", len(pat)) + + kedaPAT, kedaPATSource, err := loadSecretFromKV(ctx, cfg.KedaPATOverride, cfg.KeyVaultName, cfg.KedaPATKeyVaultSecretName) + if err != nil { + return fmt.Errorf("load KEDA PAT: %w", err) + } + logger.Info("pat.loaded", "scope", "keda", "source", string(kedaPATSource), "len", len(kedaPAT)) + + k8sClient, err := buildK8sClient() + if err != nil { + return fmt.Errorf("build kubernetes client: %w", err) + } + store := k8sstate.NewStore(k8sClient, cfg.OutputNamespace) + giteaClient := gitea.NewClient(cfg.GiteaURL, pat) + cdnClient := cdn.NewClient(cfg.OrgsJSONURL) + + rec, err := reconcile.New(reconcile.Options{ + Source: cdnClient, + Minter: giteaClient, + Store: store, + SecretNameFor: cfg.SecretNameFor, + ConfigMapName: cfg.ConfigMapName, + Whitelist: cfg.WhitelistedOrgs, + SyncAll: cfg.SyncAll, + }) + if err != nil { + return fmt.Errorf("build reconciler: %w", err) + } + + ctx, span := telemetry.Tracer().Start(ctx, "runner_org_sync.reconcile", + trace.WithAttributes(attribute.String("run_id", runID)), + ) + defer span.End() + + logger.Info("reconcile.start") + start := time.Now() + report, runErr := rec.Run(ctx) + duration := time.Since(start) + + emitMetrics(ctx, metrics, report, duration) + addSpanEvents(span, report) + + if len(report.Desired) > 0 { + logger.Info("orgs.kept", "count", len(report.Desired), "orgs", report.Desired) + } + for _, f := range report.FailedOrgs { + logger.Warn("org.reconcile.failed", + "org", f.Org, "stage", f.Stage, "err", f.Err.Error()) + } + + logger.Info("reconcile.end", + "duration_ms", duration.Milliseconds(), + "outcome", string(report.Outcome), + "discovered", report.Discovered, + "desired", len(report.Desired), + "created", len(report.SecretsCreated), + "deleted", len(report.SecretsDeleted), + "skipped", len(report.SecretsSkipped), + "failed", len(report.FailedOrgs), + "configmap_changed", report.ConfigMapChanged, + ) + + // Independent of the per-org reconcile — runs even when its outcome is + // "partial" or fatal because the KEDA Secret has its own lifecycle. + // Failure is non-fatal: logged + counted, but the CronJob exit code is + // still driven by the org reconcile result so fatal reconcile errors stay + // visible to Kubernetes. + applyKedaSecret(ctx, store, cfg, kedaPAT, metrics, logger) + + if runErr != nil { + span.RecordError(runErr) + span.SetStatus(codes.Error, runErr.Error()) + return runErr + } + + if report.Outcome == reconcile.OutcomePartial { + // Continue-on-partial: still exit 0; metric + WARN log carries the signal. + span.SetStatus(codes.Ok, "partial") + } else { + span.SetStatus(codes.Ok, "success") + } + return nil +} + +// loadSecretFromKV resolves a single Key Vault secret, honouring an env-var +// override for local development. When override is non-empty it +// short-circuits without constructing the Azure SDK client; otherwise it +// fetches from KV via Workload Identity (DefaultAzureCredential). Generic +// over the value type — used today for the two Gitea PATs; any other +// KV-stored credential could reuse it. +func loadSecretFromKV(ctx context.Context, override, vaultName, vaultSecretName string) (string, keyvault.Source, error) { + var getter keyvault.Getter + if override == "" { + g, err := keyvault.NewAzureGetter(vaultName) + if err != nil { + return "", "", fmt.Errorf("build keyvault getter: %w", err) + } + getter = g + } + loader := keyvault.NewLoader(override, getter, vaultSecretName) + return loader.Load(ctx) +} + +// applyKedaSecret writes the KEDA read-only PAT into a single-key Opaque +// Secret in the output namespace and emits its own metric / span event / +// log line. Separated from the per-org reconcile because it has an +// independent lifecycle (sourced from KV, not from Gitea) and an +// independent failure model (non-fatal — next tick retries). +func applyKedaSecret( + ctx context.Context, + store *k8sstate.Store, + cfg config.Config, + value string, + metrics *telemetry.Metrics, + logger *slog.Logger, +) { + span := trace.SpanFromContext(ctx) + changed, err := store.ApplyOpaqueSecret(ctx, cfg.KedaPATSecretName, cfg.KedaPATSecretKey, value) + metrics.KedaSecretApplied.Add(ctx, 1, metric.WithAttributes( + attribute.Bool("changed", changed), + attribute.Bool("success", err == nil), + )) + if err != nil { + logger.Warn("keda.secret.apply.failed", "err", err.Error(), "secret", cfg.KedaPATSecretName) + span.AddEvent("keda.secret.apply.failed", trace.WithAttributes( + attribute.String("secret", cfg.KedaPATSecretName), + attribute.String("err", err.Error()), + )) + return + } + span.AddEvent("keda.secret.applied", trace.WithAttributes( + attribute.String("secret", cfg.KedaPATSecretName), + attribute.Bool("changed", changed), + )) + logger.Info("keda.secret.applied", "secret", cfg.KedaPATSecretName, "changed", changed) +} + +// buildK8sClient returns a clientset that prefers in-cluster config and +// falls back to a local kubeconfig (KUBECONFIG / $HOME/.kube/config) so a +// developer can run the binary directly against a kind cluster. +func buildK8sClient() (kubernetes.Interface, error) { + if cfg, err := rest.InClusterConfig(); err == nil { + return kubernetes.NewForConfig(cfg) + } else if !errors.Is(err, rest.ErrNotInCluster) { + return nil, fmt.Errorf("in-cluster config: %w", err) + } + loading := clientcmd.NewDefaultClientConfigLoadingRules() + kubeCfg, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loading, &clientcmd.ConfigOverrides{}).ClientConfig() + if err != nil { + return nil, fmt.Errorf("local kubeconfig: %w", err) + } + return kubernetes.NewForConfig(kubeCfg) +} + +func emitMetrics(ctx context.Context, m *telemetry.Metrics, r reconcile.Report, d time.Duration) { + outcomeAttr := attribute.String("outcome", string(r.Outcome)) + m.ReconcileDuration.Record(ctx, d.Seconds(), metric.WithAttributes(outcomeAttr)) + m.ReconcileRuns.Add(ctx, 1, metric.WithAttributes(outcomeAttr)) + m.OrgsDiscovered.Record(ctx, int64(r.Discovered)) + m.OrgsDesired.Record(ctx, int64(len(r.Desired))) + m.RecordFiltered(ctx, reconcile.FilterReasonNoEnv, len(r.FilteredNoEnv)) + m.RecordFiltered(ctx, reconcile.FilterReasonWhitelist, len(r.FilteredWhitelist)) + + for _, org := range r.SecretsCreated { + m.SecretsCreated.Add(ctx, 1, metric.WithAttributes(attribute.String("org", org))) + } + for _, org := range r.SecretsDeleted { + m.SecretsDeleted.Add(ctx, 1, metric.WithAttributes(attribute.String("org", org))) + } + m.SecretsSkipped.Add(ctx, int64(len(r.SecretsSkipped))) + + for _, f := range r.FailedOrgs { + m.OrgReconcileErrors.Add(ctx, 1, metric.WithAttributes( + attribute.String("org", f.Org), + attribute.String("stage", f.Stage), + )) + } + + m.ConfigMapApplied.Add(ctx, 1, metric.WithAttributes( + attribute.Bool("changed", r.ConfigMapChanged), + )) +} + +func addSpanEvents(span trace.Span, r reconcile.Report) { + for _, org := range r.SecretsCreated { + span.AddEvent("org.secret.created", trace.WithAttributes(attribute.String("org", org))) + } + for _, org := range r.SecretsDeleted { + span.AddEvent("org.secret.deleted", trace.WithAttributes(attribute.String("org", org))) + } + for _, org := range r.SecretsSkipped { + span.AddEvent("org.skipped", trace.WithAttributes(attribute.String("org", org))) + } + for _, f := range r.FailedOrgs { + span.AddEvent("org.reconcile.failed", trace.WithAttributes( + attribute.String("org", f.Org), + attribute.String("stage", f.Stage), + attribute.String("err", f.Err.Error()), + )) + } +} diff --git a/src/runner-org-sync/go.mod b/src/runner-org-sync/go.mod new file mode 100644 index 00000000000..6af881d65e1 --- /dev/null +++ b/src/runner-org-sync/go.mod @@ -0,0 +1,75 @@ +module altinn.studio/runner-org-sync + +go 1.26.0 + +require ( + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 + github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.4.0 + github.com/google/uuid v1.6.0 + go.opentelemetry.io/otel v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 + go.opentelemetry.io/otel/metric v1.43.0 + go.opentelemetry.io/otel/sdk v1.43.0 + go.opentelemetry.io/otel/sdk/metric v1.43.0 + go.opentelemetry.io/otel/trace v1.43.0 + k8s.io/api v0.36.0 + k8s.io/apimachinery v0.36.0 + k8s.io/client-go v0.36.0 +) + +require ( + github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 // indirect + github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect + github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0 // indirect + github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect + github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emicklei/go-restful/v3 v3.13.0 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/golang-jwt/jwt/v5 v5.3.0 // indirect + github.com/google/gnostic-models v0.7.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect + github.com/spf13/pflag v1.0.9 // indirect + github.com/x448/float16 v0.8.4 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect + go.opentelemetry.io/proto/otlp v1.10.0 // indirect + go.yaml.in/yaml/v2 v2.4.3 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/crypto v0.49.0 // indirect + golang.org/x/net v0.52.0 // indirect + golang.org/x/oauth2 v0.35.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/term v0.41.0 // indirect + golang.org/x/text v0.35.0 // indirect + golang.org/x/time v0.14.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/grpc v1.80.0 // indirect + google.golang.org/protobuf v1.36.12-0.20260120151049-f2248ac996af // indirect + gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/klog/v2 v2.140.0 // indirect + k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a // indirect + k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 // indirect + sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.2 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect +) diff --git a/src/runner-org-sync/go.sum b/src/runner-org-sync/go.sum new file mode 100644 index 00000000000..c13d2c1cea7 --- /dev/null +++ b/src/runner-org-sync/go.sum @@ -0,0 +1,183 @@ +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 h1:JXg2dwJUmPB9JmtVmdEB16APJ7jurfbY5jnfXpJoRMc= +github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0/go.mod h1:YD5h/ldMsG0XiIw7PdyNhLxaM317eFh5yNLccNfGdyw= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1 h1:Hk5QBxZQC1jb2Fwj6mpzme37xbCDdNTxU7O9eb5+LB4= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.1/go.mod h1:IYus9qsFobWIc2YVwe/WPjcnyCkPKtnHAqUYeebc8z0= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2 h1:yz1bePFlP5Vws5+8ez6T3HWXPmwOK7Yvq8QxDBD3SKY= +github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2/go.mod h1:Pa9ZNPuoNu/GztvBSKk9J1cDJW6vk/n0zLtV4mgd8N8= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 h1:9iefClla7iYpfYWdzPCRDozdmndjTm8DXdpCzPajMgA= +github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2/go.mod h1:XtLgD3ZD34DAaVIIAyG3objl5DynM3CQ/vMcbBNJZGI= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.4.0 h1:/g8S6wk65vfC6m3FIxJ+i5QDyN9JWwXI8Hb0Img10hU= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets v1.4.0/go.mod h1:gpl+q95AzZlKVI3xSoseF9QPrypk0hQqBiJYeB/cR/I= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0 h1:nCYfgcSyHZXJI8J0IWE5MsCGlb2xp9fJiXyxWgmOFg4= +github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/internal v1.2.0/go.mod h1:ucUjca2JtSZboY8IoUqyQyuuXvwbMBVwFOm0vdQPNhA= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1 h1:WJTmL004Abzc5wDB5VtZG2PJk5ndYDgVacGqfirKxjM= +github.com/AzureAD/microsoft-authentication-extensions-for-go/cache v0.1.1/go.mod h1:tCcJZ0uHAmvjsVYzEFivsRTN00oz5BEsRgQHu5JZ9WE= +github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 h1:XRzhVemXdgvJqCH0sFfrBUTnUJSBrBf7++ypk+twtRs= +github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0/go.mod h1:HKpQxkWaGLJ+D/5H8QRpyQXA1eKjxkFlOMwck5+33Jk= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= +github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/emicklei/go-restful/v3 v3.13.0 h1:C4Bl2xDndpU6nJ4bc1jXd+uTmYPVUwkD6bFY/oTyCes= +github.com/emicklei/go-restful/v3 v3.13.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= +github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= +github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/gnostic-models v0.7.0 h1:qwTtogB15McXDaNqTZdzPJRHvaVJlAl+HVQnLmJEJxo= +github.com/google/gnostic-models v0.7.0/go.mod h1:whL5G0m6dmc5cPxKc5bdKdEN3UjI7OUGxBlw57miDrQ= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= +github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= +github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/keybase/go-keychain v0.0.1 h1:way+bWYa6lDppZoZcgMbYsvC7GxljxrskdNInRtuthU= +github.com/keybase/go-keychain v0.0.1/go.mod h1:PdEILRW3i9D8JcdM+FmY6RwkHGnhHxXwkPPMeUgOK1k= +github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= +github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee h1:W5t00kpgFdJifH4BDsTlE89Zl93FEloxaWZfGcifgq8= +github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= +github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= +github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= +github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= +github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 h1:8UQVDcZxOJLtX6gxtDt3vY2WTgvZqMQRzjsqiIHQdkc= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0/go.mod h1:2lmweYCiHYpEjQ/lSJBYhj9jP1zvCvQW4BqL9dnT7FQ= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 h1:RAE+JPfvEmvy+0LzyUA25/SGawPwIUbZ6u0Wug54sLc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0/go.mod h1:AGmbycVGEsRx9mXMZ75CsOyhSP6MFIcj/6dnG+vhVjk= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= +go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g= +go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.yaml.in/yaml/v2 v2.4.3 h1:6gvOSjQoTB3vt1l+CU+tSyi/HOjfOjRLJ4YwYZGwRO0= +go.yaml.in/yaml/v2 v2.4.3/go.mod h1:zSxWcmIDjOzPXpjlTTbAsKokqkDNAVtZO0WOMiT90s8= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4= +golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= +golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= +google.golang.org/protobuf v1.36.12-0.20260120151049-f2248ac996af h1:+5/Sw3GsDNlEmu7TfklWKPdQ0Ykja5VEmq2i817+jbI= +google.golang.org/protobuf v1.36.12-0.20260120151049-f2248ac996af/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/evanphx/json-patch.v4 v4.13.0 h1:czT3CmqEaQ1aanPc5SdlgQrrEIb8w/wwCvWWnfEbYzo= +gopkg.in/evanphx/json-patch.v4 v4.13.0/go.mod h1:p8EYWUEYMpynmqDbY58zCKCFZw8pRWMG4EsWvDvM72M= +gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= +gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +k8s.io/api v0.36.0 h1:SgqDhZzHdOtMk40xVSvCXkP9ME0H05hPM3p9AB1kL80= +k8s.io/api v0.36.0/go.mod h1:m1LVrGPNYax5NBHdO+QuAedXyuzTt4RryI/qnmNvs34= +k8s.io/apimachinery v0.36.0 h1:jZyPzhd5Z+3h9vJLt0z9XdzW9VzNzWAUw+P1xZ9PXtQ= +k8s.io/apimachinery v0.36.0/go.mod h1:FklypaRJt6n5wUIwWXIP6GJlIpUizTgfo1T/As+Tyxc= +k8s.io/client-go v0.36.0 h1:pOYi7C4RHChYjMiHpZSpSbIM6ZxVbRXBy7CuiIwqA3c= +k8s.io/client-go v0.36.0/go.mod h1:ZKKcpwF0aLYfkHFCjillCKaTK/yBkEDHTDXCFY6AS9Y= +k8s.io/klog/v2 v2.140.0 h1:Tf+J3AH7xnUzZyVVXhTgGhEKnFqye14aadWv7bzXdzc= +k8s.io/klog/v2 v2.140.0/go.mod h1:o+/RWfJ6PwpnFn7OyAG3QnO47BFsymfEfrz6XyYSSp0= +k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a h1:xCeOEAOoGYl2jnJoHkC3hkbPJgdATINPMAxaynU2Ovg= +k8s.io/kube-openapi v0.0.0-20260317180543-43fb72c5454a/go.mod h1:uGBT7iTA6c6MvqUvSXIaYZo9ukscABYi2btjhvgKGZ0= +k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2 h1:AZYQSJemyQB5eRxqcPky+/7EdBj0xi3g0ZcxxJ7vbWU= +k8s.io/utils v0.0.0-20260210185600-b8788abfbbc2/go.mod h1:xDxuJ0whA3d0I4mf/C4ppKHxXynQ+fxnkmQH0vTHnuk= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 h1:IpInykpT6ceI+QxKBbEflcR5EXP7sU1kvOlxwZh5txg= +sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= +sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= +sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= +sigs.k8s.io/structured-merge-diff/v6 v6.3.2 h1:kwVWMx5yS1CrnFWA/2QHyRVJ8jM6dBA80uLmm0wJkk8= +sigs.k8s.io/structured-merge-diff/v6 v6.3.2/go.mod h1:M3W8sfWvn2HhQDIbGWj3S099YozAsymCo/wrT5ohRUE= +sigs.k8s.io/yaml v1.6.0 h1:G8fkbMSAFqgEFgh4b1wmtzDnioxFCUgTZhlbj5P9QYs= +sigs.k8s.io/yaml v1.6.0/go.mod h1:796bPqUfzR/0jLAl6XjHl3Ck7MiyVv8dbTdyT3/pMf4= diff --git a/src/runner-org-sync/infra/kustomize/base/cronjob.yaml b/src/runner-org-sync/infra/kustomize/base/cronjob.yaml new file mode 100644 index 00000000000..e0f6c35e7b0 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/base/cronjob.yaml @@ -0,0 +1,100 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: runner-org-sync + labels: + app: runner-org-sync + annotations: + altinn.studio/image: "altinn-studio/runner-org-sync:latest" + altinn.studio/image-tag: "latest" +spec: + # Every 15 minutes, aligned to wall-clock quarters. + schedule: "*/15 * * * *" + + # Avoid overlap. A reconcile takes seconds; if a previous run somehow is + # still going, skipping is correct — the next tick has the latest state. + concurrencyPolicy: Forbid + + # If the controller misses a window (e.g. AKS upgrade), do not back-fill. + # The next scheduled run does the right thing on its own. + startingDeadlineSeconds: 60 + + successfulJobsHistoryLimit: 3 + failedJobsHistoryLimit: 3 + + jobTemplate: + spec: + backoffLimit: 0 + # Job-level deadline so a wedged pod cannot pile up next to a fresh one. + activeDeadlineSeconds: 300 + template: + metadata: + labels: + app: runner-org-sync + # Tells the Workload Identity webhook to inject the federated + # token volume and AZURE_* env vars into this pod. + azure.workload.identity/use: "true" + spec: + serviceAccountName: runner-org-sync + restartPolicy: Never + terminationGracePeriodSeconds: 30 + securityContext: + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + fsGroup: 65532 + seccompProfile: + type: RuntimeDefault + containers: + - name: runner-org-sync + image: altinn-studio/runner-org-sync:latest + imagePullPolicy: IfNotPresent + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + privileged: false + capabilities: + drop: ["ALL"] + env: + - name: RUNNER_ORG_SYNC_GITEA_URL + value: "http://altinn-repositories-public.default.svc.cluster.local" + - name: RUNNER_ORG_SYNC_ORGS_JSON_URL + value: "https://altinncdn.no/orgs/altinn-orgs.json" + - name: RUNNER_ORG_SYNC_OUTPUT_NAMESPACE + value: "studio-runners" + - name: RUNNER_ORG_SYNC_SECRET_NAME_PATTERN + value: "altinn-gitea-runner-{org}-secret" + - name: RUNNER_ORG_SYNC_CONFIGMAP_NAME + value: "runner-org-list" + - name: RUNNER_ORG_SYNC_SYNC_ALL + value: "false" + - name: RUNNER_ORG_SYNC_ORGS + value: "${RUNNER_ORG_SYNC_ORGS}" + - name: RUNNER_ORG_SYNC_KEYVAULT_NAME + value: "${RUNNER_ORG_SYNC_KEYVAULT_NAME}" + - name: RUNNER_ORG_SYNC_KEYVAULT_SECRET_NAME + value: "gitea-admin-pat" + # KEDA PAT projection: read-only Gitea PAT fetched from the + # same Key Vault (different secret) and written as an Opaque + # K8s Secret for KEDA's TriggerAuthentication. + - name: RUNNER_ORG_SYNC_KEDA_PAT_KEYVAULT_SECRET_NAME + value: "gitea-keda-pat" + - name: RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME + value: "keda-gitea-pat" + - name: RUNNER_ORG_SYNC_KEDA_PAT_SECRET_KEY + value: "token" + - name: OTEL_SERVICE_NAME + value: "runner-org-sync" + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://otel-router.observability.svc.cluster.local:4317" + - name: OTEL_EXPORTER_OTLP_PROTOCOL + value: "grpc" + - name: OTEL_RESOURCE_ATTRIBUTES + value: "service.namespace=studio-runners,deployment.environment=${ENVIRONMENT}" + resources: + requests: + cpu: 50m + memory: 64Mi + limits: + cpu: 500m + memory: 256Mi diff --git a/src/runner-org-sync/infra/kustomize/base/kustomization.yaml b/src/runner-org-sync/infra/kustomize/base/kustomization.yaml new file mode 100644 index 00000000000..34b922e7c05 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/base/kustomization.yaml @@ -0,0 +1,26 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: studio-runners + +resources: + - serviceaccount.yaml + - role.yaml + - rolebinding.yaml + - cronjob.yaml + - triggerauthentication.yaml + - networkpolicy.yaml + +# Copy the image annotation onto the container spec. The annotation value +# is itself substituted at deploy time by Flux post-build substitution. +replacements: + - source: + kind: CronJob + name: runner-org-sync + fieldPath: metadata.annotations.[altinn.studio/image] + targets: + - select: + kind: CronJob + name: runner-org-sync + fieldPaths: + - spec.jobTemplate.spec.template.spec.containers.[name=runner-org-sync].image diff --git a/src/runner-org-sync/infra/kustomize/base/networkpolicy.yaml b/src/runner-org-sync/infra/kustomize/base/networkpolicy.yaml new file mode 100644 index 00000000000..dfad42b33ed --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/base/networkpolicy.yaml @@ -0,0 +1,29 @@ +# Egress allowance for the OTel collector in the `observability` namespace. +# +# The studio-runners namespace has a cluster-wide default-deny egress policy +# (managed in altinn-studio-infra/provisioning/studio-runners-infra.tf) which +# whitelists DNS + Gitea + external internet, but not observability. Without +# this additional rule, runner-org-sync's OTLP exporter times out at pod +# exit and the run logs a `telemetry shutdown returned error` WARN. +# +# NetworkPolicies are additive: this policy adds to the studio-runners base +# policy rather than replacing it. Scoped via `app: runner-org-sync` so only +# this service gets the extra egress — tenant runner pods stay locked down. +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: runner-org-sync-otel-egress +spec: + podSelector: + matchLabels: + app: runner-org-sync + policyTypes: + - Egress + egress: + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: observability + ports: + - port: 4317 + protocol: TCP diff --git a/src/runner-org-sync/infra/kustomize/base/role.yaml b/src/runner-org-sync/infra/kustomize/base/role.yaml new file mode 100644 index 00000000000..9677f3d1e5f --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/base/role.yaml @@ -0,0 +1,20 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: runner-org-sync +rules: + # Per-org registration-token Secrets and the KEDA PAT Secret: + # - list to inventory managed registration Secrets, + # - get to check existence, + # - create on onboarding / first KEDA Secret write, + # - update for the KEDA Secret when its KV value rotates, + # - delete on offboarding. + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list", "create", "update", "delete"] + + # Runner-org-list ConfigMap: get to detect drift, create on first run, + # update on subsequent changes. + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "create", "update"] diff --git a/src/runner-org-sync/infra/kustomize/base/rolebinding.yaml b/src/runner-org-sync/infra/kustomize/base/rolebinding.yaml new file mode 100644 index 00000000000..ef1d9999b45 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/base/rolebinding.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: runner-org-sync +subjects: + - kind: ServiceAccount + name: runner-org-sync +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: runner-org-sync diff --git a/src/runner-org-sync/infra/kustomize/base/serviceaccount.yaml b/src/runner-org-sync/infra/kustomize/base/serviceaccount.yaml new file mode 100644 index 00000000000..1f6c27d7416 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/base/serviceaccount.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + # Keep this name stable: the Azure Entra federated credential subject is + # bound to ServiceAccount name + namespace. Renaming this SA invalidates + # Workload Identity until the federated credential is updated to match. + name: runner-org-sync + annotations: + azure.workload.identity/client-id: "${RUNNER_ORG_SYNC_ENTRA_CLIENT_ID}" diff --git a/src/runner-org-sync/infra/kustomize/base/triggerauthentication.yaml b/src/runner-org-sync/infra/kustomize/base/triggerauthentication.yaml new file mode 100644 index 00000000000..755cdddc24a --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/base/triggerauthentication.yaml @@ -0,0 +1,17 @@ +# KEDA TriggerAuthentication for the github-runner scaler. +# +# The .name + .secretTargetRef[0].name + .secretTargetRef[0].key fields here +# are pinned to the same values runner-org-sync writes via env on its CronJob +# (RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME / _SECRET_KEY) and the same name the +# workload chart references (charts/gitea-org-runner values.yaml's +# keda.authenticationRef.name). If you rename any of them, change all four +# places at once or KEDA scalers will fail with "auth ref not found". +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: keda-gitea-auth +spec: + secretTargetRef: + - parameter: personalAccessToken + name: keda-gitea-pat + key: token diff --git a/src/runner-org-sync/infra/kustomize/dev/cronjob-env.patch.yaml b/src/runner-org-sync/infra/kustomize/dev/cronjob-env.patch.yaml new file mode 100644 index 00000000000..cef86f098b3 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/dev/cronjob-env.patch.yaml @@ -0,0 +1,18 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: runner-org-sync +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: runner-org-sync + env: + - name: RUNNER_ORG_SYNC_ORGS + value: "ttd" + - name: RUNNER_ORG_SYNC_KEYVAULT_NAME + value: "altinn-studio-dev-kv" + - name: OTEL_RESOURCE_ATTRIBUTES + value: "service.namespace=studio-runners,deployment.environment=dev" diff --git a/src/runner-org-sync/infra/kustomize/dev/kustomization.yaml b/src/runner-org-sync/infra/kustomize/dev/kustomization.yaml new file mode 100644 index 00000000000..bb34589ad59 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/dev/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../base + +patches: + - path: cronjob-env.patch.yaml diff --git a/src/runner-org-sync/infra/kustomize/prod/cronjob-env.patch.yaml b/src/runner-org-sync/infra/kustomize/prod/cronjob-env.patch.yaml new file mode 100644 index 00000000000..06f01b2d23c --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/prod/cronjob-env.patch.yaml @@ -0,0 +1,18 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: runner-org-sync +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: runner-org-sync + env: + - name: RUNNER_ORG_SYNC_ORGS + value: "ttd,brg,dsb,ssb,ksdigi,pat,dibk,skm,sfvt" + - name: RUNNER_ORG_SYNC_KEYVAULT_NAME + value: "altinn-studio-prod-kv" + - name: OTEL_RESOURCE_ATTRIBUTES + value: "service.namespace=studio-runners,deployment.environment=prod" diff --git a/src/runner-org-sync/infra/kustomize/prod/kustomization.yaml b/src/runner-org-sync/infra/kustomize/prod/kustomization.yaml new file mode 100644 index 00000000000..bb34589ad59 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/prod/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../base + +patches: + - path: cronjob-env.patch.yaml diff --git a/src/runner-org-sync/infra/kustomize/staging/cronjob-env.patch.yaml b/src/runner-org-sync/infra/kustomize/staging/cronjob-env.patch.yaml new file mode 100644 index 00000000000..b94cc3aec06 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/staging/cronjob-env.patch.yaml @@ -0,0 +1,18 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: runner-org-sync +spec: + jobTemplate: + spec: + template: + spec: + containers: + - name: runner-org-sync + env: + - name: RUNNER_ORG_SYNC_ORGS + value: "ttd" + - name: RUNNER_ORG_SYNC_KEYVAULT_NAME + value: "altinn-studio-staging-kv" + - name: OTEL_RESOURCE_ATTRIBUTES + value: "service.namespace=studio-runners,deployment.environment=staging" diff --git a/src/runner-org-sync/infra/kustomize/staging/kustomization.yaml b/src/runner-org-sync/infra/kustomize/staging/kustomization.yaml new file mode 100644 index 00000000000..bb34589ad59 --- /dev/null +++ b/src/runner-org-sync/infra/kustomize/staging/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../base + +patches: + - path: cronjob-env.patch.yaml diff --git a/src/runner-org-sync/internal/cdn/cdn.go b/src/runner-org-sync/internal/cdn/cdn.go new file mode 100644 index 00000000000..13c97f5ca43 --- /dev/null +++ b/src/runner-org-sync/internal/cdn/cdn.go @@ -0,0 +1,108 @@ +// Package cdn fetches and decodes the Altinn organisations document from +// https://altinncdn.no/orgs/altinn-orgs.json (or an equivalent test URL). +// +// The CDN document is a single JSON object whose top-level "orgs" key maps +// short organisation codes (e.g. "ttd", "brg") to per-org metadata. Only +// fields used downstream are decoded; the rest are silently ignored. +package cdn + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "time" +) + +const ( + defaultTimeout = 30 * time.Second + defaultUserAgent = "runner-org-sync" + maxErrorBody = 512 + // maxSuccessBody caps the JSON-decode read so a pathological CDN + // response cannot exhaust pod memory. altinn-orgs.json is ~100 KB + // today; 10 MiB is generous and far below the pod's memory limit. + maxSuccessBody = 10 << 20 // 10 MiB +) + +// ErrUnexpectedStatus is returned when the CDN responds with non-2xx. +var ErrUnexpectedStatus = errors.New("cdn: unexpected status") + +// Org is one entry from altinn-orgs.json. Code is the map key from the +// document (populated by Fetch, not by the JSON decoder). +type Org struct { + Code string `json:"-"` + Name map[string]string `json:"name"` + Orgnr string `json:"orgnr"` + Environments []string `json:"environments"` +} + +// Client fetches the orgs document. +type Client struct { + httpClient *http.Client + url string + userAgent string +} + +// Option configures a Client. +type Option func(*Client) + +// WithHTTPClient overrides the default HTTP client (use in tests with httptest). +func WithHTTPClient(h *http.Client) Option { + return func(c *Client) { c.httpClient = h } +} + +// WithUserAgent overrides the outgoing User-Agent header. +func WithUserAgent(ua string) Option { + return func(c *Client) { c.userAgent = ua } +} + +// NewClient constructs a Client targeting the given URL. +func NewClient(url string, opts ...Option) *Client { + c := &Client{ + httpClient: &http.Client{Timeout: defaultTimeout}, + url: url, + userAgent: defaultUserAgent, + } + for _, opt := range opts { + opt(c) + } + return c +} + +// Fetch retrieves the orgs document and returns one Org per entry with the +// org code populated from the map key. Order is not stable. +func (c *Client) Fetch(ctx context.Context) ([]Org, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.url, nil) + if err != nil { + return nil, fmt.Errorf("cdn: build request: %w", err) + } + req.Header.Set("User-Agent", c.userAgent) + req.Header.Set("Accept", "application/json") + + resp, err := c.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("cdn: get %s: %w", c.url, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(io.LimitReader(resp.Body, maxErrorBody)) + return nil, fmt.Errorf("%w %d from %s: %s", ErrUnexpectedStatus, resp.StatusCode, c.url, string(body)) + } + + var doc struct { + Orgs map[string]Org `json:"orgs"` + } + if err := json.NewDecoder(io.LimitReader(resp.Body, maxSuccessBody)).Decode(&doc); err != nil { + return nil, fmt.Errorf("cdn: decode body: %w", err) + } + + orgs := make([]Org, 0, len(doc.Orgs)) + for code, o := range doc.Orgs { + o.Code = code + orgs = append(orgs, o) + } + return orgs, nil +} diff --git a/src/runner-org-sync/internal/cdn/cdn_test.go b/src/runner-org-sync/internal/cdn/cdn_test.go new file mode 100644 index 00000000000..16ff0909f9d --- /dev/null +++ b/src/runner-org-sync/internal/cdn/cdn_test.go @@ -0,0 +1,170 @@ +package cdn + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "sort" + "strings" + "testing" + "time" +) + +const sampleOrgsJSON = `{ + "orgs": { + "ttd": { + "name": {"en": "Test org TTD", "nb": "Test org TTD", "nn": "Test org TTD"}, + "orgnr": "991825827", + "environments": ["tt02", "production"] + }, + "brg": { + "name": {"en": "Brønnøysundregistrene", "nb": "Brønnøysundregistrene"}, + "orgnr": "974760673", + "environments": ["tt02", "production"] + }, + "acn": { + "name": {"en": "ACN Test org"}, + "orgnr": "999999990", + "environments": [] + } + } +}` + +func newStubServer(t *testing.T, handler http.HandlerFunc) *httptest.Server { + t.Helper() + s := httptest.NewServer(handler) + t.Cleanup(s.Close) + return s +} + +func TestFetch_Happy(t *testing.T) { + var gotUA, gotAccept, gotMethod string + s := newStubServer(t, func(w http.ResponseWriter, r *http.Request) { + gotMethod = r.Method + gotUA = r.Header.Get("User-Agent") + gotAccept = r.Header.Get("Accept") + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(sampleOrgsJSON)) + }) + + c := NewClient(s.URL, WithUserAgent("test-agent")) + orgs, err := c.Fetch(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got, want := len(orgs), 3; got != want { + t.Fatalf("len(orgs) = %d, want %d", got, want) + } + + // Index for deterministic assertions (map iteration is random). + byCode := indexByCode(orgs) + + ttd, ok := byCode["ttd"] + if !ok { + t.Fatal("ttd missing from result") + } + if got, want := ttd.Code, "ttd"; got != want { + t.Errorf("ttd.Code = %q, want %q", got, want) + } + if got, want := ttd.Orgnr, "991825827"; got != want { + t.Errorf("ttd.Orgnr = %q, want %q", got, want) + } + if got, want := ttd.Environments, []string{"tt02", "production"}; !equalSlice(got, want) { + t.Errorf("ttd.Environments = %v, want %v", got, want) + } + acn := byCode["acn"] + if len(acn.Environments) != 0 { + t.Errorf("acn.Environments = %v, want empty", acn.Environments) + } + + if gotMethod != http.MethodGet { + t.Errorf("HTTP method = %q, want GET", gotMethod) + } + if gotUA != "test-agent" { + t.Errorf("User-Agent = %q, want test-agent", gotUA) + } + if !strings.Contains(gotAccept, "application/json") { + t.Errorf("Accept = %q does not contain application/json", gotAccept) + } +} + +func TestFetch_EmptyOrgs(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{"orgs": {}}`)) + }) + c := NewClient(s.URL) + orgs, err := c.Fetch(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(orgs) != 0 { + t.Errorf("len(orgs) = %d, want 0", len(orgs)) + } +} + +func TestFetch_MalformedJSON(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{"orgs": this is not json}`)) + }) + c := NewClient(s.URL) + _, err := c.Fetch(context.Background()) + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), "decode") { + t.Errorf("error should mention decode failure; got: %v", err) + } +} + +func TestFetch_Non200(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusServiceUnavailable) + _, _ = w.Write([]byte("upstream is down")) + }) + c := NewClient(s.URL) + _, err := c.Fetch(context.Background()) + if err == nil { + t.Fatal("expected error, got nil") + } + if !errors.Is(err, ErrUnexpectedStatus) { + t.Errorf("expected ErrUnexpectedStatus, got %v", err) + } +} + +func TestFetch_ContextCancelled(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + time.Sleep(200 * time.Millisecond) + _, _ = w.Write([]byte(sampleOrgsJSON)) + }) + c := NewClient(s.URL) + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond) + defer cancel() + _, err := c.Fetch(ctx) + if err == nil { + t.Fatal("expected error, got nil") + } +} + +func indexByCode(orgs []Org) map[string]Org { + m := make(map[string]Org, len(orgs)) + for _, o := range orgs { + m[o.Code] = o + } + return m +} + +func equalSlice(a, b []string) bool { + if len(a) != len(b) { + return false + } + ac, bc := append([]string(nil), a...), append([]string(nil), b...) + sort.Strings(ac) + sort.Strings(bc) + for i := range ac { + if ac[i] != bc[i] { + return false + } + } + return true +} diff --git a/src/runner-org-sync/internal/config/config.go b/src/runner-org-sync/internal/config/config.go new file mode 100644 index 00000000000..c946a62a887 --- /dev/null +++ b/src/runner-org-sync/internal/config/config.go @@ -0,0 +1,168 @@ +// Package config loads and validates the runner-org-sync runtime configuration +// from environment variables. The loader is fail-fast and aggregates all +// invalid/missing values into a single error so a misconfigured CronJob +// surfaces every problem in one run, not one per restart. +package config + +import ( + "errors" + "fmt" + "os" + "strings" +) + +const ( + EnvGiteaURL = "RUNNER_ORG_SYNC_GITEA_URL" + EnvOrgsJSONURL = "RUNNER_ORG_SYNC_ORGS_JSON_URL" + EnvOutputNamespace = "RUNNER_ORG_SYNC_OUTPUT_NAMESPACE" + EnvSecretNamePattern = "RUNNER_ORG_SYNC_SECRET_NAME_PATTERN" + EnvConfigMapName = "RUNNER_ORG_SYNC_CONFIGMAP_NAME" + EnvKeyVaultName = "RUNNER_ORG_SYNC_KEYVAULT_NAME" + EnvKeyVaultSecretName = "RUNNER_ORG_SYNC_KEYVAULT_SECRET_NAME" + EnvSyncAll = "RUNNER_ORG_SYNC_SYNC_ALL" + EnvWhitelistedOrgs = "RUNNER_ORG_SYNC_ORGS" + EnvGiteaPATOverride = "RUNNER_ORG_SYNC_GITEA_PAT" + + // KEDA PAT projection: read-only Gitea PAT consumed by KEDA's + // github-runner scaler. Fetched from the same Key Vault as the admin + // PAT, written to a K8s Secret in OutputNamespace. + EnvKedaPATKeyVaultSecretName = "RUNNER_ORG_SYNC_KEDA_PAT_KEYVAULT_SECRET_NAME" + EnvKedaPATSecretName = "RUNNER_ORG_SYNC_KEDA_PAT_SECRET_NAME" + EnvKedaPATSecretKey = "RUNNER_ORG_SYNC_KEDA_PAT_SECRET_KEY" + EnvKedaPATOverride = "RUNNER_ORG_SYNC_KEDA_PAT" + + // OrgPlaceholder is the substring SecretNamePattern must contain; + // it is substituted with the org code at apply time. + OrgPlaceholder = "{org}" +) + +// Config holds validated runtime settings. Construct only via Load/LoadFrom. +type Config struct { + GiteaURL string + OrgsJSONURL string + OutputNamespace string + SecretNamePattern string + ConfigMapName string + KeyVaultName string + KeyVaultSecretName string + SyncAll bool + WhitelistedOrgs []string + GiteaPATOverride string + + // KEDA PAT projection settings. KedaPATKeyVaultSecretName is the secret + // name in Azure Key Vault; KedaPATSecretName/Key control the destination + // K8s Secret. KedaPATOverride is a local-dev bypass mirroring GiteaPATOverride. + KedaPATKeyVaultSecretName string + KedaPATSecretName string + KedaPATSecretKey string + KedaPATOverride string +} + +// Getter abstracts os.Getenv so tests can inject a fake environment without +// mutating the process global state. +type Getter func(key string) string + +// Load reads configuration from the process environment. +func Load() (Config, error) { + return LoadFrom(os.Getenv) +} + +// LoadFrom reads configuration using the supplied getter and validates it. +// Every failure is collected and reported in a single joined error. +func LoadFrom(get Getter) (Config, error) { + cfg := Config{ + GiteaURL: strings.TrimSpace(get(EnvGiteaURL)), + OrgsJSONURL: strings.TrimSpace(get(EnvOrgsJSONURL)), + OutputNamespace: strings.TrimSpace(get(EnvOutputNamespace)), + SecretNamePattern: strings.TrimSpace(get(EnvSecretNamePattern)), + ConfigMapName: strings.TrimSpace(get(EnvConfigMapName)), + KeyVaultName: strings.TrimSpace(get(EnvKeyVaultName)), + KeyVaultSecretName: strings.TrimSpace(get(EnvKeyVaultSecretName)), + SyncAll: parseBool(get(EnvSyncAll)), + WhitelistedOrgs: parseCSV(get(EnvWhitelistedOrgs)), + GiteaPATOverride: strings.TrimSpace(get(EnvGiteaPATOverride)), + + KedaPATKeyVaultSecretName: strings.TrimSpace(get(EnvKedaPATKeyVaultSecretName)), + KedaPATSecretName: strings.TrimSpace(get(EnvKedaPATSecretName)), + KedaPATSecretKey: strings.TrimSpace(get(EnvKedaPATSecretKey)), + KedaPATOverride: strings.TrimSpace(get(EnvKedaPATOverride)), + } + + var errs []error + requireField(&errs, EnvGiteaURL, cfg.GiteaURL) + requireField(&errs, EnvOrgsJSONURL, cfg.OrgsJSONURL) + requireField(&errs, EnvOutputNamespace, cfg.OutputNamespace) + requireField(&errs, EnvSecretNamePattern, cfg.SecretNamePattern) + requireField(&errs, EnvConfigMapName, cfg.ConfigMapName) + requireField(&errs, EnvKedaPATSecretName, cfg.KedaPATSecretName) + requireField(&errs, EnvKedaPATSecretKey, cfg.KedaPATSecretKey) + + if cfg.SecretNamePattern != "" && !strings.Contains(cfg.SecretNamePattern, OrgPlaceholder) { + errs = append(errs, fmt.Errorf("%s must contain the %q placeholder", EnvSecretNamePattern, OrgPlaceholder)) + } + + // Admin PAT must be reachable either via override (local dev) or via Key Vault (in-cluster). + if cfg.GiteaPATOverride == "" { + requireField(&errs, EnvKeyVaultName, cfg.KeyVaultName) + requireField(&errs, EnvKeyVaultSecretName, cfg.KeyVaultSecretName) + } + + // KEDA PAT has the same shape: override or KV-secret-name. KeyVaultName is + // shared with the admin PAT (one vault, multiple secrets), so it's already + // validated above. + if cfg.KedaPATOverride == "" { + requireField(&errs, EnvKedaPATKeyVaultSecretName, cfg.KedaPATKeyVaultSecretName) + } + + // Either syncAll=true or a non-empty whitelist. An empty intersection is + // almost certainly a misconfiguration, not an intended "sync nothing". + if !cfg.SyncAll && len(cfg.WhitelistedOrgs) == 0 { + errs = append(errs, fmt.Errorf("either %s=true or %s must be a non-empty CSV list", EnvSyncAll, EnvWhitelistedOrgs)) + } + + if len(errs) > 0 { + return Config{}, fmt.Errorf("invalid configuration: %w", errors.Join(errs...)) + } + return cfg, nil +} + +// SecretNameFor renders SecretNamePattern for the given org code. +func (c Config) SecretNameFor(org string) string { + return strings.ReplaceAll(c.SecretNamePattern, OrgPlaceholder, org) +} + +func requireField(errs *[]error, name, value string) { + if value == "" { + *errs = append(*errs, fmt.Errorf("%s is required", name)) + } +} + +func parseBool(raw string) bool { + switch strings.ToLower(strings.TrimSpace(raw)) { + case "1", "t", "true", "yes", "y": + return true + default: + return false + } +} + +func parseCSV(raw string) []string { + if raw == "" { + return nil + } + parts := strings.Split(raw, ",") + out := make([]string, 0, len(parts)) + seen := make(map[string]struct{}, len(parts)) + for _, p := range parts { + p = strings.TrimSpace(p) + if p == "" { + continue + } + if _, dup := seen[p]; dup { + continue + } + seen[p] = struct{}{} + out = append(out, p) + } + return out +} diff --git a/src/runner-org-sync/internal/config/config_test.go b/src/runner-org-sync/internal/config/config_test.go new file mode 100644 index 00000000000..33fbb107bce --- /dev/null +++ b/src/runner-org-sync/internal/config/config_test.go @@ -0,0 +1,223 @@ +package config + +import ( + "strings" + "testing" +) + +// validEnv returns a baseline env map representing a fully valid configuration. +// Tests mutate a copy to exercise one validation branch at a time. +func validEnv() map[string]string { + return map[string]string{ + EnvGiteaURL: "http://gitea.local", + EnvOrgsJSONURL: "https://altinncdn.no/orgs/altinn-orgs.json", + EnvOutputNamespace: "studio-runners", + EnvSecretNamePattern: "altinn-gitea-runner-{org}-secret", + EnvConfigMapName: "runner-org-list", + EnvKeyVaultName: "kv-studio", + EnvKeyVaultSecretName: "gitea-admin-pat", + EnvWhitelistedOrgs: "ttd,brg,dsb", + EnvKedaPATKeyVaultSecretName: "gitea-keda-pat", + EnvKedaPATSecretName: "keda-gitea-pat", + EnvKedaPATSecretKey: "token", + } +} + +func getter(env map[string]string) Getter { + return func(k string) string { return env[k] } +} + +func TestLoadFrom_Valid(t *testing.T) { + cfg, err := LoadFrom(getter(validEnv())) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.GiteaURL != "http://gitea.local" { + t.Errorf("GiteaURL = %q", cfg.GiteaURL) + } + if got, want := cfg.WhitelistedOrgs, []string{"ttd", "brg", "dsb"}; !equalSlice(got, want) { + t.Errorf("WhitelistedOrgs = %v, want %v", got, want) + } + if cfg.SyncAll { + t.Errorf("SyncAll = true, want false") + } + if cfg.GiteaPATOverride != "" { + t.Errorf("GiteaPATOverride = %q, want empty (no override in baseline env)", cfg.GiteaPATOverride) + } +} + +func TestLoadFrom_PATOverrideRelaxesKeyVaultRequirement(t *testing.T) { + env := validEnv() + delete(env, EnvKeyVaultName) + delete(env, EnvKeyVaultSecretName) + env[EnvGiteaPATOverride] = "pat-xyz" + + cfg, err := LoadFrom(getter(env)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.GiteaPATOverride != "pat-xyz" { + t.Errorf("GiteaPATOverride = %q, want pat-xyz", cfg.GiteaPATOverride) + } +} + +func TestLoadFrom_KedaPATOverrideRelaxesKVRequirement(t *testing.T) { + env := validEnv() + delete(env, EnvKedaPATKeyVaultSecretName) + env[EnvKedaPATOverride] = "keda-pat-xyz" + + cfg, err := LoadFrom(getter(env)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if cfg.KedaPATOverride != "keda-pat-xyz" { + t.Errorf("KedaPATOverride = %q, want keda-pat-xyz", cfg.KedaPATOverride) + } + if cfg.GiteaPATOverride != "" { + t.Errorf("GiteaPATOverride = %q, want empty (admin still goes to KV)", cfg.GiteaPATOverride) + } +} + +func TestLoadFrom_KedaPATFieldsRequired(t *testing.T) { + env := validEnv() + delete(env, EnvKedaPATSecretName) + delete(env, EnvKedaPATSecretKey) + delete(env, EnvKedaPATKeyVaultSecretName) + + _, err := LoadFrom(getter(env)) + if err == nil { + t.Fatal("expected error, got nil") + } + msg := err.Error() + for _, want := range []string{EnvKedaPATSecretName, EnvKedaPATSecretKey, EnvKedaPATKeyVaultSecretName} { + if !strings.Contains(msg, want) { + t.Errorf("error does not mention %q; got %v", want, err) + } + } +} + +func TestLoadFrom_RequiredFieldsAggregated(t *testing.T) { + _, err := LoadFrom(getter(map[string]string{})) + if err == nil { + t.Fatal("expected error, got nil") + } + msg := err.Error() + // All required fields plus the whitelist invariant should appear in one error. + wantSubstrings := []string{ + EnvGiteaURL, + EnvOrgsJSONURL, + EnvOutputNamespace, + EnvSecretNamePattern, + EnvConfigMapName, + EnvKeyVaultName, + EnvKeyVaultSecretName, + "either RUNNER_ORG_SYNC_SYNC_ALL=true", + } + for _, s := range wantSubstrings { + if !strings.Contains(msg, s) { + t.Errorf("error does not mention %q\n full error: %s", s, msg) + } + } +} + +func TestLoadFrom_SecretNamePatternMustContainPlaceholder(t *testing.T) { + env := validEnv() + env[EnvSecretNamePattern] = "altinn-gitea-runner-secret" // no {org} + + _, err := LoadFrom(getter(env)) + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), OrgPlaceholder) { + t.Errorf("error should mention %q placeholder; got: %v", OrgPlaceholder, err) + } +} + +func TestLoadFrom_SyncAllAcceptsEmptyWhitelist(t *testing.T) { + env := validEnv() + env[EnvSyncAll] = "true" + delete(env, EnvWhitelistedOrgs) + + cfg, err := LoadFrom(getter(env)) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !cfg.SyncAll { + t.Errorf("SyncAll = false, want true") + } + if len(cfg.WhitelistedOrgs) != 0 { + t.Errorf("WhitelistedOrgs = %v, want empty", cfg.WhitelistedOrgs) + } +} + +func TestLoadFrom_RejectsEmptyWhitelistWhenSyncAllOff(t *testing.T) { + env := validEnv() + delete(env, EnvWhitelistedOrgs) + + _, err := LoadFrom(getter(env)) + if err == nil { + t.Fatal("expected error, got nil") + } +} + +func TestParseCSV(t *testing.T) { + cases := []struct { + in string + want []string + }{ + {"", nil}, + {"ttd", []string{"ttd"}}, + {"ttd,brg,dsb", []string{"ttd", "brg", "dsb"}}, + {" ttd , brg ,dsb ", []string{"ttd", "brg", "dsb"}}, + {"ttd,,brg", []string{"ttd", "brg"}}, + {",ttd,", []string{"ttd"}}, + {"ttd,brg,ttd", []string{"ttd", "brg"}}, // dedup + } + for _, c := range cases { + got := parseCSV(c.in) + if !equalSlice(got, c.want) { + t.Errorf("parseCSV(%q) = %v, want %v", c.in, got, c.want) + } + } +} + +func TestParseBool(t *testing.T) { + cases := map[string]bool{ + "": false, + "true": true, + "TRUE": true, + "True": true, + "1": true, + "yes": true, + "y": true, + "t": true, + "false": false, + "0": false, + "no": false, + "junk": false, + } + for in, want := range cases { + if got := parseBool(in); got != want { + t.Errorf("parseBool(%q) = %v, want %v", in, got, want) + } + } +} + +func TestSecretNameFor(t *testing.T) { + c := Config{SecretNamePattern: "altinn-gitea-runner-{org}-secret"} + if got, want := c.SecretNameFor("ttd"), "altinn-gitea-runner-ttd-secret"; got != want { + t.Errorf("SecretNameFor = %q, want %q", got, want) + } +} + +func equalSlice(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/src/runner-org-sync/internal/gitea/gitea.go b/src/runner-org-sync/internal/gitea/gitea.go new file mode 100644 index 00000000000..b143e8a0ae3 --- /dev/null +++ b/src/runner-org-sync/internal/gitea/gitea.go @@ -0,0 +1,137 @@ +// Package gitea is a minimal admin client for Gitea — just enough to mint +// per-organisation Actions runner registration tokens. +// +// The endpoint targeted is Gitea's organisation-scoped runner registration +// token API. The returned token is a one-shot string that an act_runner +// process uses to register itself with Gitea; once registered the runner +// keeps its own long-lived identity. +package gitea + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" +) + +const ( + defaultTimeout = 15 * time.Second + defaultUserAgent = "runner-org-sync" + maxErrorBody = 512 + // maxSuccessBody caps the registration-token JSON decode. The real + // response is a few hundred bytes; 16 KiB is generous defense against + // a pathological Gitea reply. + maxSuccessBody = 16 << 10 // 16 KiB +) + +// Sentinel errors. Callers can errors.Is against these to drive reconcile +// policy (e.g. ErrUnauthorized → fatal; ErrOrgNotFound → skip & continue). +var ( + ErrUnauthorized = errors.New("gitea: unauthorized (bad PAT)") + ErrOrgNotFound = errors.New("gitea: organisation not found") + ErrServer = errors.New("gitea: server error") +) + +// Client talks to a Gitea instance using a Personal Access Token. +type Client struct { + httpClient *http.Client + baseURL string + pat string + userAgent string +} + +// Option configures a Client. +type Option func(*Client) + +// WithHTTPClient overrides the default HTTP client. +func WithHTTPClient(h *http.Client) Option { + return func(c *Client) { c.httpClient = h } +} + +// WithUserAgent overrides the User-Agent header. +func WithUserAgent(ua string) Option { + return func(c *Client) { c.userAgent = ua } +} + +// NewClient constructs a Client. baseURL should be the Gitea instance root +// (e.g. "http://altinn-repositories-public.default.svc.cluster.local"); the +// trailing slash is normalised away. +func NewClient(baseURL, pat string, opts ...Option) *Client { + c := &Client{ + httpClient: &http.Client{Timeout: defaultTimeout}, + baseURL: strings.TrimRight(baseURL, "/"), + pat: pat, + userAgent: defaultUserAgent, + } + for _, opt := range opts { + opt(c) + } + return c +} + +// MintRegistrationToken returns a fresh runner registration token for the +// given organisation. org is the short Gitea organisation name (e.g. "ttd"). +// +// The endpoint requires HTTP POST in Gitea 1.26+ (the GET form was removed). +// Tokens themselves have no time-based expiry. However, each POST atomically +// deactivates every previously-issued token for the same org — Gitea allows +// at most one active org-scoped registration token at a time. Callers must +// therefore mint only when no usable token exists, otherwise any not-yet- +// registered runner using an older Secret value will fail to register. +func (c *Client) MintRegistrationToken(ctx context.Context, org string) (string, error) { + if org == "" { + return "", errors.New("gitea: org is required") + } + endpoint := fmt.Sprintf("%s/api/v1/orgs/%s/actions/runners/registration-token", + c.baseURL, url.PathEscape(org)) + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, nil) + if err != nil { + return "", fmt.Errorf("gitea: build request: %w", err) + } + req.Header.Set("Authorization", "token "+c.pat) + req.Header.Set("Accept", "application/json") + req.Header.Set("User-Agent", c.userAgent) + + resp, err := c.httpClient.Do(req) + if err != nil { + return "", fmt.Errorf("gitea: get registration token for %s: %w", org, err) + } + defer resp.Body.Close() + + switch { + case resp.StatusCode == http.StatusOK: + // fall through + case resp.StatusCode == http.StatusUnauthorized, resp.StatusCode == http.StatusForbidden: + return "", fmt.Errorf("%w: status %d", ErrUnauthorized, resp.StatusCode) + case resp.StatusCode == http.StatusNotFound: + return "", fmt.Errorf("%w: %s", ErrOrgNotFound, org) + case resp.StatusCode >= 500: + body := readErrorBody(resp.Body) + return "", fmt.Errorf("%w: status %d: %s", ErrServer, resp.StatusCode, body) + default: + body := readErrorBody(resp.Body) + return "", fmt.Errorf("gitea: unexpected status %d: %s", resp.StatusCode, body) + } + + var payload struct { + Token string `json:"token"` + } + if err := json.NewDecoder(io.LimitReader(resp.Body, maxSuccessBody)).Decode(&payload); err != nil { + return "", fmt.Errorf("gitea: decode response for %s: %w", org, err) + } + if payload.Token == "" { + return "", fmt.Errorf("gitea: empty token in response for %s", org) + } + return payload.Token, nil +} + +func readErrorBody(r io.Reader) string { + body, _ := io.ReadAll(io.LimitReader(r, maxErrorBody)) + return string(body) +} diff --git a/src/runner-org-sync/internal/gitea/gitea_test.go b/src/runner-org-sync/internal/gitea/gitea_test.go new file mode 100644 index 00000000000..03f605c21c3 --- /dev/null +++ b/src/runner-org-sync/internal/gitea/gitea_test.go @@ -0,0 +1,159 @@ +package gitea + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" +) + +func newStubServer(t *testing.T, handler http.HandlerFunc) *httptest.Server { + t.Helper() + s := httptest.NewServer(handler) + t.Cleanup(s.Close) + return s +} + +func TestMintRegistrationToken_Happy(t *testing.T) { + var gotMethod, gotPath, gotAuth, gotUA string + s := newStubServer(t, func(w http.ResponseWriter, r *http.Request) { + gotMethod = r.Method + gotPath = r.URL.Path + gotAuth = r.Header.Get("Authorization") + gotUA = r.Header.Get("User-Agent") + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"token":"reg-token-abc"}`)) + }) + + c := NewClient(s.URL+"/", "pat-xyz", WithUserAgent("ua-test")) + token, err := c.MintRegistrationToken(context.Background(), "ttd") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if token != "reg-token-abc" { + t.Errorf("token = %q, want reg-token-abc", token) + } + // Gitea 1.26+ requires POST; the legacy GET form was removed. + if gotMethod != http.MethodPost { + t.Errorf("method = %q, want POST", gotMethod) + } + if want := "/api/v1/orgs/ttd/actions/runners/registration-token"; gotPath != want { + t.Errorf("path = %q, want %q", gotPath, want) + } + if gotAuth != "token pat-xyz" { + t.Errorf("Authorization = %q, want %q", gotAuth, "token pat-xyz") + } + if gotUA != "ua-test" { + t.Errorf("User-Agent = %q, want ua-test", gotUA) + } +} + +func TestMintRegistrationToken_PathEscaped(t *testing.T) { + // Gitea org names are validated, but defence in depth: ensure path escaping + // is applied so a hostile or malformed org code cannot construct a URL. + var gotPath string + s := newStubServer(t, func(w http.ResponseWriter, r *http.Request) { + gotPath = r.URL.EscapedPath() + _, _ = w.Write([]byte(`{"token":"x"}`)) + }) + c := NewClient(s.URL, "pat") + if _, err := c.MintRegistrationToken(context.Background(), "weird/org"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !strings.Contains(gotPath, "weird%2Forg") { + t.Errorf("path did not escape slash: %q", gotPath) + } +} + +func TestMintRegistrationToken_Unauthorized(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + _, _ = w.Write([]byte("nope")) + }) + c := NewClient(s.URL, "bad-pat") + _, err := c.MintRegistrationToken(context.Background(), "ttd") + if !errors.Is(err, ErrUnauthorized) { + t.Errorf("want ErrUnauthorized, got %v", err) + } +} + +func TestMintRegistrationToken_Forbidden(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusForbidden) + }) + c := NewClient(s.URL, "pat") + _, err := c.MintRegistrationToken(context.Background(), "ttd") + if !errors.Is(err, ErrUnauthorized) { + t.Errorf("want ErrUnauthorized, got %v", err) + } +} + +func TestMintRegistrationToken_NotFound(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + c := NewClient(s.URL, "pat") + _, err := c.MintRegistrationToken(context.Background(), "missing-org") + if !errors.Is(err, ErrOrgNotFound) { + t.Errorf("want ErrOrgNotFound, got %v", err) + } +} + +func TestMintRegistrationToken_ServerError(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + _, _ = w.Write([]byte("kaboom")) + }) + c := NewClient(s.URL, "pat") + _, err := c.MintRegistrationToken(context.Background(), "ttd") + if !errors.Is(err, ErrServer) { + t.Errorf("want ErrServer, got %v", err) + } +} + +func TestMintRegistrationToken_EmptyToken(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{"token":""}`)) + }) + c := NewClient(s.URL, "pat") + _, err := c.MintRegistrationToken(context.Background(), "ttd") + if err == nil || !strings.Contains(err.Error(), "empty token") { + t.Errorf("want empty-token error, got %v", err) + } +} + +func TestMintRegistrationToken_MalformedJSON(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + _, _ = w.Write([]byte(`{not json`)) + }) + c := NewClient(s.URL, "pat") + _, err := c.MintRegistrationToken(context.Background(), "ttd") + if err == nil || !strings.Contains(err.Error(), "decode") { + t.Errorf("want decode error, got %v", err) + } +} + +func TestMintRegistrationToken_EmptyOrg(t *testing.T) { + c := NewClient("http://example", "pat") + _, err := c.MintRegistrationToken(context.Background(), "") + if err == nil { + t.Fatal("expected error for empty org, got nil") + } +} + +func TestMintRegistrationToken_ContextCancelled(t *testing.T) { + s := newStubServer(t, func(w http.ResponseWriter, _ *http.Request) { + time.Sleep(200 * time.Millisecond) + _, _ = w.Write([]byte(`{"token":"x"}`)) + }) + c := NewClient(s.URL, "pat") + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Millisecond) + defer cancel() + _, err := c.MintRegistrationToken(ctx, "ttd") + if err == nil { + t.Fatal("expected error from cancelled context, got nil") + } +} diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate.go b/src/runner-org-sync/internal/k8sstate/k8sstate.go new file mode 100644 index 00000000000..a37869a56b3 --- /dev/null +++ b/src/runner-org-sync/internal/k8sstate/k8sstate.go @@ -0,0 +1,270 @@ +// Package k8sstate provides the in-cluster reconcile primitives used by the +// runner-org-sync CronJob: listing the Secrets we own, creating and +// deleting per-org registration-token Secrets, and applying the runners +// ConfigMap idempotently. +// +// The Store is constructed around a kubernetes.Interface so the production +// path uses a real REST client while tests inject the fake clientset from +// k8s.io/client-go/kubernetes/fake. +package k8sstate + +import ( + "bytes" + "context" + "fmt" + "maps" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" +) + +// Label keys and well-known values for resources this service owns. +const ( + LabelManagedBy = "app.kubernetes.io/managed-by" + LabelComponent = "app.kubernetes.io/component" + LabelOrg = "runner-org-sync.altinn.studio/org" + LabelFluxWatch = "reconcile.fluxcd.io/watch" + + ManagedBy = "runner-org-sync" + ComponentRegToken = "runner-registration-token" + ComponentRunnerCM = "runner-org-list" + FluxWatchEnabled = "Enabled" + + // SecretTokenKey is the data key inside per-org registration Secrets, + // matching what the runner Deployment's secretKeyRef expects. + SecretTokenKey = "token" +) + +// RegistrationSecretState describes whether a per-org runner registration +// Secret is safe for the ConfigMap to reference. +type RegistrationSecretState string + +const ( + RegistrationSecretMissing RegistrationSecretState = "missing" + RegistrationSecretValid RegistrationSecretState = "valid" + RegistrationSecretInvalid RegistrationSecretState = "invalid" +) + +// Store is the package's only entry point for cluster I/O. +type Store struct { + client kubernetes.Interface + namespace string +} + +// NewStore constructs a Store bound to a single namespace. +func NewStore(client kubernetes.Interface, namespace string) *Store { + return &Store{client: client, namespace: namespace} +} + +// Namespace returns the namespace the Store operates in. Useful for logs. +func (s *Store) Namespace() string { return s.namespace } + +// ListManagedSecrets returns all Secrets in the namespace that this service +// owns, matched by ManagedBy + Component labels. +func (s *Store) ListManagedSecrets(ctx context.Context) ([]corev1.Secret, error) { + selector := fmt.Sprintf("%s=%s,%s=%s", + LabelManagedBy, ManagedBy, + LabelComponent, ComponentRegToken, + ) + list, err := s.client.CoreV1().Secrets(s.namespace).List(ctx, metav1.ListOptions{ + LabelSelector: selector, + }) + if err != nil { + return nil, fmt.Errorf("k8sstate: list secrets: %w", err) + } + return list.Items, nil +} + +// RegistrationSecretStatus reports whether the named Secret exists and has +// the ownership labels and token data expected for the given org. +func (s *Store) RegistrationSecretStatus(ctx context.Context, name, org string) (RegistrationSecretState, error) { + sec, err := s.client.CoreV1().Secrets(s.namespace).Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return RegistrationSecretMissing, nil + } + if err != nil { + return "", fmt.Errorf("k8sstate: get registration secret %s: %w", name, err) + } + if sec.Type != "" && sec.Type != corev1.SecretTypeOpaque { + return RegistrationSecretInvalid, nil + } + if len(sec.Data[SecretTokenKey]) == 0 { + return RegistrationSecretInvalid, nil + } + if hasConflictingLabel(sec.Labels, LabelManagedBy, ManagedBy) || + hasConflictingLabel(sec.Labels, LabelComponent, ComponentRegToken) || + hasConflictingLabel(sec.Labels, LabelOrg, org) { + return RegistrationSecretInvalid, nil + } + if sec.Labels == nil { + sec.Labels = map[string]string{} + } + labelsChanged := ensureLabel(sec.Labels, LabelManagedBy, ManagedBy) + labelsChanged = ensureLabel(sec.Labels, LabelComponent, ComponentRegToken) || labelsChanged + labelsChanged = ensureLabel(sec.Labels, LabelOrg, org) || labelsChanged + if labelsChanged { + if _, err := s.client.CoreV1().Secrets(s.namespace).Update(ctx, sec, metav1.UpdateOptions{}); err != nil { + return "", fmt.Errorf("k8sstate: adopt registration secret %s: %w", name, err) + } + } + return RegistrationSecretValid, nil +} + +// CreateRegistrationSecret creates an Opaque Secret carrying the +// registration token at key "token", labelled with ManagedBy / Component / +// Org. Returns the underlying error verbatim so callers can use apierrors.IsAlreadyExists. +func (s *Store) CreateRegistrationSecret(ctx context.Context, name, org, token string) error { + sec := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: s.namespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRegToken, + LabelOrg: org, + }, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{ + SecretTokenKey: []byte(token), + }, + } + if _, err := s.client.CoreV1().Secrets(s.namespace).Create(ctx, sec, metav1.CreateOptions{}); err != nil { + return fmt.Errorf("k8sstate: create secret %s: %w", name, err) + } + return nil +} + +// DeleteSecret removes the named Secret. NotFound is treated as success so +// the operation is idempotent across reconciles. +func (s *Store) DeleteSecret(ctx context.Context, name string) error { + err := s.client.CoreV1().Secrets(s.namespace).Delete(ctx, name, metav1.DeleteOptions{}) + if err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("k8sstate: delete secret %s: %w", name, err) + } + return nil +} + +// ApplyConfigMap creates or updates the named ConfigMap so its Data matches +// the supplied value. Returns true if a write actually occurred (create or +// update), false if the existing object already matched. Labels are +// preserved on update; managed labels are added or restored if missing. +func (s *Store) ApplyConfigMap(ctx context.Context, name string, data map[string]string) (bool, error) { + desired := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: s.namespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRunnerCM, + LabelFluxWatch: FluxWatchEnabled, + }, + }, + Data: data, + } + + existing, err := s.client.CoreV1().ConfigMaps(s.namespace).Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + if _, err := s.client.CoreV1().ConfigMaps(s.namespace).Create(ctx, desired, metav1.CreateOptions{}); err != nil { + return false, fmt.Errorf("k8sstate: create configmap %s: %w", name, err) + } + return true, nil + } + if err != nil { + return false, fmt.Errorf("k8sstate: get configmap %s: %w", name, err) + } + + if existing.Labels == nil { + existing.Labels = map[string]string{} + } + labelsChanged := ensureLabel(existing.Labels, LabelManagedBy, ManagedBy) + labelsChanged = ensureLabel(existing.Labels, LabelComponent, ComponentRunnerCM) || labelsChanged + labelsChanged = ensureLabel(existing.Labels, LabelFluxWatch, FluxWatchEnabled) || labelsChanged + if maps.Equal(existing.Data, data) && !labelsChanged { + return false, nil + } + existing.Data = data + + if _, err := s.client.CoreV1().ConfigMaps(s.namespace).Update(ctx, existing, metav1.UpdateOptions{}); err != nil { + return false, fmt.Errorf("k8sstate: update configmap %s: %w", name, err) + } + return true, nil +} + +// ApplyOpaqueSecret creates or updates a single-key Opaque Secret so its +// data[key] equals value. Returns true if a write occurred. Used for the +// KEDA PAT projection: the value originates from Key Vault, the Secret is +// consumed by KEDA's TriggerAuthentication. +// +// Labels are applied on create (ManagedBy). On update, the managed-by label +// is added or restored if missing; other existing labels are preserved. +func (s *Store) ApplyOpaqueSecret(ctx context.Context, name, key, value string) (bool, error) { + if key == "" { + return false, fmt.Errorf("k8sstate: ApplyOpaqueSecret %s: key is required", name) + } + encoded := []byte(value) + + existing, err := s.client.CoreV1().Secrets(s.namespace).Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + desired := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: s.namespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + }, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{key: encoded}, + } + if _, err := s.client.CoreV1().Secrets(s.namespace).Create(ctx, desired, metav1.CreateOptions{}); err != nil { + return false, fmt.Errorf("k8sstate: create opaque secret %s: %w", name, err) + } + return true, nil + } + if err != nil { + return false, fmt.Errorf("k8sstate: get opaque secret %s: %w", name, err) + } + + if existing.Labels == nil { + existing.Labels = map[string]string{} + } + labelsChanged := ensureLabel(existing.Labels, LabelManagedBy, ManagedBy) + + // Only writing the single key we manage; leave any other keys untouched. + if bytes.Equal(existing.Data[key], encoded) && !labelsChanged { + return false, nil + } + + if existing.Data == nil { + existing.Data = map[string][]byte{} + } + existing.Data[key] = encoded + + if _, err := s.client.CoreV1().Secrets(s.namespace).Update(ctx, existing, metav1.UpdateOptions{}); err != nil { + return false, fmt.Errorf("k8sstate: update opaque secret %s: %w", name, err) + } + return true, nil +} + +// OrgFromSecret extracts the org code from a managed Secret's label. Returns +// the empty string if the label is missing — callers should treat that as +// a foreign Secret and skip it. +func OrgFromSecret(s corev1.Secret) string { + return s.Labels[LabelOrg] +} + +func ensureLabel(labels map[string]string, key, value string) bool { + if labels[key] == value { + return false + } + labels[key] = value + return true +} + +func hasConflictingLabel(labels map[string]string, key, expected string) bool { + value, ok := labels[key] + return ok && value != expected +} diff --git a/src/runner-org-sync/internal/k8sstate/k8sstate_test.go b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go new file mode 100644 index 00000000000..fd2565af435 --- /dev/null +++ b/src/runner-org-sync/internal/k8sstate/k8sstate_test.go @@ -0,0 +1,459 @@ +package k8sstate + +import ( + "context" + "sort" + "testing" + + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" +) + +const testNamespace = "studio-runners" + +func TestCreateRegistrationSecret_SetsLabelsAndData(t *testing.T) { + c := fake.NewSimpleClientset() + s := NewStore(c, testNamespace) + + if err := s.CreateRegistrationSecret(context.Background(), "altinn-gitea-runner-ttd-secret", "ttd", "tok-1"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + + got, err := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "altinn-gitea-runner-ttd-secret", metav1.GetOptions{}) + if err != nil { + t.Fatalf("get failed: %v", err) + } + if got.Type != corev1.SecretTypeOpaque { + t.Errorf("type = %v, want Opaque", got.Type) + } + if string(got.Data[SecretTokenKey]) != "tok-1" { + t.Errorf("data[%s] = %q, want %q", SecretTokenKey, string(got.Data[SecretTokenKey]), "tok-1") + } + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by = %q, want %q", got.Labels[LabelManagedBy], ManagedBy) + } + if got.Labels[LabelComponent] != ComponentRegToken { + t.Errorf("component = %q, want %q", got.Labels[LabelComponent], ComponentRegToken) + } + if got.Labels[LabelOrg] != "ttd" { + t.Errorf("org label = %q, want ttd", got.Labels[LabelOrg]) + } +} + +func TestCreateRegistrationSecret_AlreadyExists(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "x", Namespace: testNamespace}, + }) + s := NewStore(c, testNamespace) + err := s.CreateRegistrationSecret(context.Background(), "x", "ttd", "tok") + if err == nil { + t.Fatal("expected error for duplicate, got nil") + } +} + +func TestRegistrationSecretStatus(t *testing.T) { + c := fake.NewSimpleClientset( + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "valid", + Namespace: testNamespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRegToken, + LabelOrg: "ttd", + }, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{SecretTokenKey: []byte("tok")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foreign", + Namespace: testNamespace, + Labels: map[string]string{LabelManagedBy: "someone-else"}, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{SecretTokenKey: []byte("tok")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "legacy-unlabeled", + Namespace: testNamespace, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{SecretTokenKey: []byte("tok")}, + }, + &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "empty-token", + Namespace: testNamespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRegToken, + LabelOrg: "ttd", + }, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{SecretTokenKey: nil}, + }, + ) + s := NewStore(c, testNamespace) + + tests := []struct { + name string + secretName string + org string + want RegistrationSecretState + }{ + {name: "valid", secretName: "valid", org: "ttd", want: RegistrationSecretValid}, + {name: "missing", secretName: "missing", org: "ttd", want: RegistrationSecretMissing}, + {name: "foreign same name", secretName: "foreign", org: "ttd", want: RegistrationSecretInvalid}, + {name: "legacy unlabeled", secretName: "legacy-unlabeled", org: "ttd", want: RegistrationSecretValid}, + {name: "wrong org", secretName: "valid", org: "brg", want: RegistrationSecretInvalid}, + {name: "empty token", secretName: "empty-token", org: "ttd", want: RegistrationSecretInvalid}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := s.RegistrationSecretStatus(context.Background(), tt.secretName, tt.org) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != tt.want { + t.Errorf("RegistrationSecretStatus() = %q, want %q", got, tt.want) + } + }) + } + + got, err := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "legacy-unlabeled", metav1.GetOptions{}) + if err != nil { + t.Fatalf("get adopted legacy secret: %v", err) + } + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("legacy managed-by label = %q, want %q", got.Labels[LabelManagedBy], ManagedBy) + } + if got.Labels[LabelComponent] != ComponentRegToken { + t.Errorf("legacy component label = %q, want %q", got.Labels[LabelComponent], ComponentRegToken) + } + if got.Labels[LabelOrg] != "ttd" { + t.Errorf("legacy org label = %q, want ttd", got.Labels[LabelOrg]) + } +} + +func TestDeleteSecret_IdempotentOnMissing(t *testing.T) { + c := fake.NewSimpleClientset() + s := NewStore(c, testNamespace) + if err := s.DeleteSecret(context.Background(), "never-existed"); err != nil { + t.Errorf("delete missing should be nil, got %v", err) + } +} + +func TestDeleteSecret_RemovesExisting(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "x", Namespace: testNamespace}, + }) + s := NewStore(c, testNamespace) + if err := s.DeleteSecret(context.Background(), "x"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + _, err := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "x", metav1.GetOptions{}) + if !apierrors.IsNotFound(err) { + t.Errorf("get deleted secret error = %v, want NotFound", err) + } +} + +func TestListManagedSecrets_OnlyOurs(t *testing.T) { + managed1 := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ours-ttd", Namespace: testNamespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRegToken, + LabelOrg: "ttd", + }, + }, + } + managed2 := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ours-brg", Namespace: testNamespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRegToken, + LabelOrg: "brg", + }, + }, + } + foreign := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "stranger", Namespace: testNamespace, + Labels: map[string]string{LabelManagedBy: "someone-else"}, + }, + } + c := fake.NewSimpleClientset(managed1, managed2, foreign) + s := NewStore(c, testNamespace) + + got, err := s.ListManagedSecrets(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if len(got) != 2 { + t.Fatalf("got %d secrets, want 2", len(got)) + } + names := []string{got[0].Name, got[1].Name} + sort.Strings(names) + if names[0] != "ours-brg" || names[1] != "ours-ttd" { + t.Errorf("got secrets %v, want [ours-brg ours-ttd]", names) + } +} + +func TestApplyConfigMap_CreatesWhenMissing(t *testing.T) { + c := fake.NewSimpleClientset() + s := NewStore(c, testNamespace) + + changed, err := s.ApplyConfigMap(context.Background(), "runner-org-list", map[string]string{"runners.yaml": "- name: ttd\n"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (create)") + } + cm, err := c.CoreV1().ConfigMaps(testNamespace).Get(context.Background(), "runner-org-list", metav1.GetOptions{}) + if err != nil { + t.Fatalf("get: %v", err) + } + if cm.Data["runners.yaml"] != "- name: ttd\n" { + t.Errorf("data wrong: %v", cm.Data) + } + if cm.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by label missing, got %v", cm.Labels) + } + if cm.Labels[LabelFluxWatch] != FluxWatchEnabled { + t.Errorf("flux watch label missing, got %v", cm.Labels) + } +} + +func TestApplyConfigMap_NoOpOnSameContent(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cm", Namespace: testNamespace, + Labels: map[string]string{ + LabelManagedBy: ManagedBy, + LabelComponent: ComponentRunnerCM, + LabelFluxWatch: FluxWatchEnabled, + }, + }, + Data: map[string]string{"k": "v"}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyConfigMap(context.Background(), "cm", map[string]string{"k": "v"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if changed { + t.Error("changed = true, want false (no diff)") + } +} + +func TestApplyConfigMap_UpdatesOnLabelDrift(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cm", + Namespace: testNamespace, + Labels: map[string]string{"custom": "keep"}, + }, + Data: map[string]string{"k": "v"}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyConfigMap(context.Background(), "cm", map[string]string{"k": "v"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (label drift)") + } + got, _ := c.CoreV1().ConfigMaps(testNamespace).Get(context.Background(), "cm", metav1.GetOptions{}) + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by label was not restored, got %v", got.Labels) + } + if got.Labels[LabelComponent] != ComponentRunnerCM { + t.Errorf("component label was not restored, got %v", got.Labels) + } + if got.Labels[LabelFluxWatch] != FluxWatchEnabled { + t.Errorf("flux watch label was not restored, got %v", got.Labels) + } + if got.Labels["custom"] != "keep" { + t.Errorf("custom label was not preserved, got %v", got.Labels) + } +} + +func TestApplyConfigMap_UpdatesOnDifference(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{Name: "cm", Namespace: testNamespace}, + Data: map[string]string{"k": "old"}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyConfigMap(context.Background(), "cm", map[string]string{"k": "new"}) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (update)") + } + got, _ := c.CoreV1().ConfigMaps(testNamespace).Get(context.Background(), "cm", metav1.GetOptions{}) + if got.Data["k"] != "new" { + t.Errorf("data not updated: %v", got.Data) + } + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by label was not added on update, got %v", got.Labels) + } +} + +func TestOrgFromSecret(t *testing.T) { + s := corev1.Secret{ObjectMeta: metav1.ObjectMeta{Labels: map[string]string{LabelOrg: "ttd"}}} + if got := OrgFromSecret(s); got != "ttd" { + t.Errorf("OrgFromSecret = %q, want ttd", got) + } + if got := OrgFromSecret(corev1.Secret{}); got != "" { + t.Errorf("OrgFromSecret on unlabelled secret = %q, want empty", got) + } +} + +func TestApplyOpaqueSecret_CreatesWhenMissing(t *testing.T) { + c := fake.NewSimpleClientset() + s := NewStore(c, testNamespace) + + changed, err := s.ApplyOpaqueSecret(context.Background(), "keda-gitea-pat", "token", "pat-value") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (create)") + } + got, err := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "keda-gitea-pat", metav1.GetOptions{}) + if err != nil { + t.Fatalf("get: %v", err) + } + if got.Type != corev1.SecretTypeOpaque { + t.Errorf("type = %v, want Opaque", got.Type) + } + if string(got.Data["token"]) != "pat-value" { + t.Errorf("data[token] = %q, want pat-value", string(got.Data["token"])) + } + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by = %q, want %q", got.Labels[LabelManagedBy], ManagedBy) + } +} + +func TestApplyOpaqueSecret_NoOpOnSameValue(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "keda-gitea-pat", Namespace: testNamespace, + Labels: map[string]string{LabelManagedBy: ManagedBy}, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{"token": []byte("pat-value")}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyOpaqueSecret(context.Background(), "keda-gitea-pat", "token", "pat-value") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if changed { + t.Error("changed = true, want false (no diff)") + } +} + +func TestApplyOpaqueSecret_UpdatesOnLabelDrift(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "keda-gitea-pat", + Namespace: testNamespace, + Labels: map[string]string{"custom": "keep"}, + }, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{"token": []byte("pat-value")}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyOpaqueSecret(context.Background(), "keda-gitea-pat", "token", "pat-value") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (label drift)") + } + got, _ := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "keda-gitea-pat", metav1.GetOptions{}) + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by label was not restored, got %v", got.Labels) + } + if got.Labels["custom"] != "keep" { + t.Errorf("custom label was not preserved, got %v", got.Labels) + } + if string(got.Data["token"]) != "pat-value" { + t.Errorf("token = %q, want pat-value", string(got.Data["token"])) + } +} + +func TestApplyOpaqueSecret_UpdatesOnDifference(t *testing.T) { + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "keda-gitea-pat", Namespace: testNamespace}, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{"token": []byte("old-pat")}, + }) + s := NewStore(c, testNamespace) + + changed, err := s.ApplyOpaqueSecret(context.Background(), "keda-gitea-pat", "token", "new-pat") + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if !changed { + t.Error("changed = false, want true (update)") + } + got, _ := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "keda-gitea-pat", metav1.GetOptions{}) + if string(got.Data["token"]) != "new-pat" { + t.Errorf("data[token] = %q, want new-pat", string(got.Data["token"])) + } + if got.Labels[LabelManagedBy] != ManagedBy { + t.Errorf("managed-by label was not added on update, got %v", got.Labels) + } +} + +func TestApplyOpaqueSecret_PreservesOtherKeys(t *testing.T) { + // Some other actor wrote an unrelated key into the Secret; we must not + // stomp on it when applying ours. This is defence in depth against an + // operator that manages multiple keys in one Opaque Secret. + c := fake.NewSimpleClientset(&corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "shared", Namespace: testNamespace}, + Type: corev1.SecretTypeOpaque, + Data: map[string][]byte{ + "token": []byte("old-pat"), + "other": []byte("not-ours"), + }, + }) + s := NewStore(c, testNamespace) + + if _, err := s.ApplyOpaqueSecret(context.Background(), "shared", "token", "new-pat"); err != nil { + t.Fatalf("unexpected error: %v", err) + } + got, _ := c.CoreV1().Secrets(testNamespace).Get(context.Background(), "shared", metav1.GetOptions{}) + if string(got.Data["other"]) != "not-ours" { + t.Errorf("other key was overwritten: %q", string(got.Data["other"])) + } + if string(got.Data["token"]) != "new-pat" { + t.Errorf("token = %q, want new-pat", string(got.Data["token"])) + } +} + +func TestApplyOpaqueSecret_RejectsEmptyKey(t *testing.T) { + c := fake.NewSimpleClientset() + s := NewStore(c, testNamespace) + if _, err := s.ApplyOpaqueSecret(context.Background(), "x", "", "v"); err == nil { + t.Fatal("expected error for empty key, got nil") + } +} diff --git a/src/runner-org-sync/internal/keyvault/keyvault.go b/src/runner-org-sync/internal/keyvault/keyvault.go new file mode 100644 index 00000000000..1935ae989a6 --- /dev/null +++ b/src/runner-org-sync/internal/keyvault/keyvault.go @@ -0,0 +1,113 @@ +// Package keyvault resolves the Gitea Personal Access Token. +// +// Two sources are supported: +// +// - an env-var override (for local development; sidesteps Azure auth entirely), +// - Azure Key Vault, accessed via DefaultAzureCredential, which in-cluster +// resolves to the Workload Identity federated token automatically. +// +// The Loader returns the resolved PAT along with a Source label so callers +// can log where the value came from (an accidental env-var fallback in +// production is then immediately visible). +package keyvault + +import ( + "context" + "errors" + "fmt" + + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" + "github.com/Azure/azure-sdk-for-go/sdk/security/keyvault/azsecrets" +) + +// Source describes where a PAT came from. +type Source string + +const ( + SourceEnv Source = "env" + SourceKeyVault Source = "keyvault" +) + +// ErrNoSource is returned when the loader has neither an env override nor a +// configured Key Vault Getter. +var ErrNoSource = errors.New("keyvault: no env override and no Key Vault getter configured") + +// Getter abstracts secret retrieval. The production implementation wraps +// the Azure SDK; tests inject a stub. +type Getter interface { + GetSecret(ctx context.Context, secretName string) (string, error) +} + +// Loader resolves a PAT, preferring the env override over the Key Vault +// path. Construct with NewLoader; the zero value is not usable. +type Loader struct { + envOverride string + getter Getter + secretName string +} + +// NewLoader creates a Loader. Pass an empty envOverride to disable the +// override and force the Key Vault path. Pass a nil getter when only the +// env path is configured (Load will return ErrNoSource if it has to fall +// back to Key Vault). +func NewLoader(envOverride string, getter Getter, secretName string) *Loader { + return &Loader{ + envOverride: envOverride, + getter: getter, + secretName: secretName, + } +} + +// Load resolves the PAT. It returns the value, the source it came from, and +// any error. Source is meaningful only when err is nil. +func (l *Loader) Load(ctx context.Context) (string, Source, error) { + if l.envOverride != "" { + return l.envOverride, SourceEnv, nil + } + if l.getter == nil { + return "", "", ErrNoSource + } + v, err := l.getter.GetSecret(ctx, l.secretName) + if err != nil { + return "", "", err + } + if v == "" { + return "", "", fmt.Errorf("keyvault: secret %q has empty value", l.secretName) + } + return v, SourceKeyVault, nil +} + +// NewAzureGetter constructs a production Getter backed by Azure Key Vault. +// It uses DefaultAzureCredential, which inside an AKS pod with the Workload +// Identity webhook prefers the federated token. vaultName is the short name +// (e.g. "studio-kv"), not the full URL. +func NewAzureGetter(vaultName string) (Getter, error) { + if vaultName == "" { + return nil, errors.New("keyvault: vaultName is required") + } + cred, err := azidentity.NewDefaultAzureCredential(nil) + if err != nil { + return nil, fmt.Errorf("keyvault: build credential: %w", err) + } + vaultURL := fmt.Sprintf("https://%s.vault.azure.net", vaultName) + client, err := azsecrets.NewClient(vaultURL, cred, nil) + if err != nil { + return nil, fmt.Errorf("keyvault: build secrets client: %w", err) + } + return &azureGetter{client: client}, nil +} + +type azureGetter struct { + client *azsecrets.Client +} + +func (g *azureGetter) GetSecret(ctx context.Context, name string) (string, error) { + resp, err := g.client.GetSecret(ctx, name, "", nil) + if err != nil { + return "", fmt.Errorf("keyvault: GetSecret %q: %w", name, err) + } + if resp.Value == nil { + return "", nil + } + return *resp.Value, nil +} diff --git a/src/runner-org-sync/internal/keyvault/keyvault_test.go b/src/runner-org-sync/internal/keyvault/keyvault_test.go new file mode 100644 index 00000000000..eeed4951bce --- /dev/null +++ b/src/runner-org-sync/internal/keyvault/keyvault_test.go @@ -0,0 +1,99 @@ +package keyvault + +import ( + "context" + "errors" + "strings" + "testing" +) + +// stubGetter records how it was called and returns a canned response. +type stubGetter struct { + value string + err error + gotCtx context.Context + gotSecret string + calls int +} + +func (s *stubGetter) GetSecret(ctx context.Context, name string) (string, error) { + s.calls++ + s.gotCtx = ctx + s.gotSecret = name + return s.value, s.err +} + +func TestLoad_EnvOverridePrefersOverGetter(t *testing.T) { + getter := &stubGetter{value: "from-kv"} + l := NewLoader("override-pat", getter, "kv-secret-name") + + val, src, err := l.Load(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if val != "override-pat" { + t.Errorf("value = %q, want override-pat", val) + } + if src != SourceEnv { + t.Errorf("source = %q, want %q", src, SourceEnv) + } + if getter.calls != 0 { + t.Errorf("getter should not be called when env override is set; got %d calls", getter.calls) + } +} + +func TestLoad_KeyVaultPath(t *testing.T) { + getter := &stubGetter{value: "from-kv"} + l := NewLoader("", getter, "gitea-admin-pat") + + val, src, err := l.Load(context.Background()) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if val != "from-kv" { + t.Errorf("value = %q, want from-kv", val) + } + if src != SourceKeyVault { + t.Errorf("source = %q, want %q", src, SourceKeyVault) + } + if getter.gotSecret != "gitea-admin-pat" { + t.Errorf("getter called with secret %q, want gitea-admin-pat", getter.gotSecret) + } +} + +func TestLoad_NoOverrideNoGetter(t *testing.T) { + l := NewLoader("", nil, "name") + _, _, err := l.Load(context.Background()) + if !errors.Is(err, ErrNoSource) { + t.Errorf("want ErrNoSource, got %v", err) + } +} + +func TestLoad_GetterError(t *testing.T) { + wantErr := errors.New("kv down") + getter := &stubGetter{err: wantErr} + l := NewLoader("", getter, "name") + _, _, err := l.Load(context.Background()) + if !errors.Is(err, wantErr) { + t.Errorf("expected wrapped error, got %v", err) + } +} + +func TestLoad_EmptyValueFromKeyVault(t *testing.T) { + getter := &stubGetter{value: ""} + l := NewLoader("", getter, "name") + _, _, err := l.Load(context.Background()) + if err == nil { + t.Fatal("expected error, got nil") + } + if !strings.Contains(err.Error(), "empty value") { + t.Errorf("error should mention empty value; got %v", err) + } +} + +func TestNewAzureGetter_RejectsEmptyVaultName(t *testing.T) { + _, err := NewAzureGetter("") + if err == nil { + t.Fatal("expected error for empty vault name, got nil") + } +} diff --git a/src/runner-org-sync/internal/reconcile/reconcile.go b/src/runner-org-sync/internal/reconcile/reconcile.go new file mode 100644 index 00000000000..86dc2798def --- /dev/null +++ b/src/runner-org-sync/internal/reconcile/reconcile.go @@ -0,0 +1,331 @@ +// Package reconcile implements the pure orchestration loop of +// runner-org-sync: fetch the org list, filter it, diff against the cluster, +// mint missing tokens, delete unwanted Secrets, and project the runners +// ConfigMap. +// +// The Reconciler depends on small interfaces and never imports OpenTelemetry +// or slog — observability is the caller's responsibility, driven by the +// Report returned from Run. This keeps unit tests free of any global setup. +package reconcile + +import ( + "context" + "errors" + "fmt" + "sort" + + "altinn.studio/runner-org-sync/internal/cdn" + "altinn.studio/runner-org-sync/internal/gitea" + "altinn.studio/runner-org-sync/internal/k8sstate" + corev1 "k8s.io/api/core/v1" + "sigs.k8s.io/yaml" +) + +// Defaults used when the caller does not override. +const ( + ConfigMapDataKey = "runners.yaml" + FilterReasonNoEnv = "no_environments" + FilterReasonWhitelist = "not_in_whitelist" +) + +// Failure stages, surfaced on Report.FailedOrgs[*].Stage. +const ( + StageValidate = "validate" + StageMint = "mint" + StageCreate = "create" + StageDelete = "delete" +) + +// OrgSource produces the discovered org population (typically the CDN client). +type OrgSource interface { + Fetch(ctx context.Context) ([]cdn.Org, error) +} + +// TokenMinter produces a fresh registration token for an org. The +// implementation must be safe to call concurrently with itself; the +// Reconciler currently calls it serially, but that may change. +type TokenMinter interface { + MintRegistrationToken(ctx context.Context, org string) (string, error) +} + +// SecretStore is the cluster I/O surface the Reconciler needs. +type SecretStore interface { + ListManagedSecrets(ctx context.Context) ([]corev1.Secret, error) + RegistrationSecretStatus(ctx context.Context, name, org string) (k8sstate.RegistrationSecretState, error) + CreateRegistrationSecret(ctx context.Context, name, org, token string) error + DeleteSecret(ctx context.Context, name string) error + ApplyConfigMap(ctx context.Context, name string, data map[string]string) (bool, error) +} + +// Outcome summarises how a run ended. +type Outcome string + +const ( + OutcomeSuccess Outcome = "success" + OutcomePartial Outcome = "partial" + OutcomeFailure Outcome = "failure" +) + +// OrgFailure records a single per-org error captured during reconciliation. +// It does not abort the run; the org is simply omitted from this tick's +// ConfigMap so the chart never references a Secret that does not exist. +type OrgFailure struct { + Org string + Stage string + Err error +} + +// Report is the structured result of a single Run. The caller derives all +// telemetry (logs, metrics, span events) from this value. +type Report struct { + Outcome Outcome + Discovered int + FilteredNoEnv []string + FilteredWhitelist []string + Desired []string + SecretsCreated []string + SecretsDeleted []string + SecretsSkipped []string + FailedOrgs []OrgFailure + ConfigMapChanged bool +} + +// Reconciler ties the dependencies together. Construct with New. +type Reconciler struct { + source OrgSource + minter TokenMinter + store SecretStore + secretNameFor func(org string) string + configMapName string + whitelist map[string]struct{} + syncAll bool +} + +// Options configure a Reconciler. The zero value is invalid; all four +// dependency fields are required by New. +type Options struct { + Source OrgSource + Minter TokenMinter + Store SecretStore + SecretNameFor func(org string) string + ConfigMapName string + Whitelist []string // empty + SyncAll=false → error at construction + SyncAll bool +} + +// New constructs a Reconciler from validated Options. +func New(opts Options) (*Reconciler, error) { + switch { + case opts.Source == nil: + return nil, errors.New("reconcile: Source is required") + case opts.Minter == nil: + return nil, errors.New("reconcile: Minter is required") + case opts.Store == nil: + return nil, errors.New("reconcile: Store is required") + case opts.SecretNameFor == nil: + return nil, errors.New("reconcile: SecretNameFor is required") + case opts.ConfigMapName == "": + return nil, errors.New("reconcile: ConfigMapName is required") + case !opts.SyncAll && len(opts.Whitelist) == 0: + return nil, errors.New("reconcile: either SyncAll=true or a non-empty Whitelist is required") + } + wl := make(map[string]struct{}, len(opts.Whitelist)) + for _, w := range opts.Whitelist { + wl[w] = struct{}{} + } + return &Reconciler{ + source: opts.Source, + minter: opts.Minter, + store: opts.Store, + secretNameFor: opts.SecretNameFor, + configMapName: opts.ConfigMapName, + whitelist: wl, + syncAll: opts.SyncAll, + }, nil +} + +// Run executes one full reconciliation cycle. It returns a non-nil error +// only for fatal failures (CDN unreachable, listing Secrets fails, applying +// the ConfigMap fails). Per-org failures are captured in Report.FailedOrgs; +// the function still returns nil error and Outcome=Partial so the CronJob +// exits zero and the next tick retries. +func (r *Reconciler) Run(ctx context.Context) (Report, error) { + report := Report{Outcome: OutcomeFailure} + + orgs, err := r.source.Fetch(ctx) + if err != nil { + return report, fmt.Errorf("reconcile: fetch orgs: %w", err) + } + report.Discovered = len(orgs) + + desired := r.filter(orgs, &report) + report.Desired = orgCodes(desired) + sort.Strings(report.Desired) + + existing, err := r.store.ListManagedSecrets(ctx) + if err != nil { + return report, fmt.Errorf("reconcile: list managed secrets: %w", err) + } + + // For each desired org, ensure its Secret exists. Per-org failures are + // recorded but do not abort the run. + orgHasSecret := make(map[string]bool, len(desired)) + for _, org := range desired { + name := r.secretNameFor(org.Code) + status, err := r.store.RegistrationSecretStatus(ctx, name, org.Code) + if err != nil { + // The lookup hitting a transient apiserver error is fatal for + // this run — without this lookup we cannot decide mint-or-skip. + return report, fmt.Errorf("reconcile: check registration secret %s: %w", name, err) + } + switch status { + case k8sstate.RegistrationSecretValid: + report.SecretsSkipped = append(report.SecretsSkipped, org.Code) + orgHasSecret[org.Code] = true + continue + case k8sstate.RegistrationSecretInvalid: + report.FailedOrgs = append(report.FailedOrgs, OrgFailure{ + Org: org.Code, + Stage: StageValidate, + Err: fmt.Errorf("registration secret %s exists but is not a valid runner token secret", name), + }) + continue + } + token, err := r.minter.MintRegistrationToken(ctx, org.Code) + if err != nil { + // Auth failures hit every subsequent org with the same PAT — + // fail fast instead of cascading the same root cause across + // the whole desired set. K8s records the CronJob failure and + // the next tick retries with whatever the latest PAT in KV is. + if errors.Is(err, gitea.ErrUnauthorized) { + return report, fmt.Errorf("reconcile: mint token for %s: %w", org.Code, err) + } + report.FailedOrgs = append(report.FailedOrgs, OrgFailure{Org: org.Code, Stage: StageMint, Err: err}) + continue + } + if err := r.store.CreateRegistrationSecret(ctx, name, org.Code, token); err != nil { + report.FailedOrgs = append(report.FailedOrgs, OrgFailure{Org: org.Code, Stage: StageCreate, Err: err}) + continue + } + report.SecretsCreated = append(report.SecretsCreated, org.Code) + orgHasSecret[org.Code] = true + } + + // Delete Secrets we own whose org is no longer desired. + desiredSet := make(map[string]struct{}, len(desired)) + for _, o := range desired { + desiredSet[o.Code] = struct{}{} + } + for _, sec := range existing { + org := k8sstate.OrgFromSecret(sec) + if org == "" { + // Defence in depth: a managed Secret missing the org label is a + // drift signal; skip rather than delete on uncertain attribution. + continue + } + if _, keep := desiredSet[org]; keep { + continue + } + if err := r.store.DeleteSecret(ctx, sec.Name); err != nil { + report.FailedOrgs = append(report.FailedOrgs, OrgFailure{Org: org, Stage: StageDelete, Err: err}) + continue + } + report.SecretsDeleted = append(report.SecretsDeleted, org) + } + + // Project the ConfigMap from orgs whose Secret currently exists. This is + // what guarantees the chart never points at a missing Secret: if a mint + // failed earlier this run, the org silently drops out this tick. + projected := make([]string, 0, len(desired)) + for _, o := range desired { + if orgHasSecret[o.Code] { + projected = append(projected, o.Code) + } + } + sort.Strings(projected) + sort.Strings(report.SecretsCreated) + sort.Strings(report.SecretsDeleted) + sort.Strings(report.SecretsSkipped) + + data := map[string]string{ + ConfigMapDataKey: renderRunners(projected, r.secretNameFor), + } + changed, err := r.store.ApplyConfigMap(ctx, r.configMapName, data) + if err != nil { + return report, fmt.Errorf("reconcile: apply configmap %s: %w", r.configMapName, err) + } + report.ConfigMapChanged = changed + + if len(report.FailedOrgs) > 0 { + report.Outcome = OutcomePartial + } else { + report.Outcome = OutcomeSuccess + } + return report, nil +} + +// filter applies the environments-non-empty and whitelist filters, +// recording filtered-out org codes in the report for visibility. +func (r *Reconciler) filter(orgs []cdn.Org, report *Report) []cdn.Org { + out := make([]cdn.Org, 0, len(orgs)) + for _, o := range orgs { + if len(o.Environments) == 0 { + report.FilteredNoEnv = append(report.FilteredNoEnv, o.Code) + continue + } + if !r.syncAll { + if _, ok := r.whitelist[o.Code]; !ok { + report.FilteredWhitelist = append(report.FilteredWhitelist, o.Code) + continue + } + } + out = append(out, o) + } + sort.Strings(report.FilteredNoEnv) + sort.Strings(report.FilteredWhitelist) + sort.Slice(out, func(i, j int) bool { return out[i].Code < out[j].Code }) + return out +} + +// renderRunners emits Helm values consumed by the gitea-org-runner-config +// HelmRelease via Flux valuesFrom. Determinism via sorted input is required +// so unchanged state produces unchanged output and ApplyConfigMap detects +// "no change" correctly. +// +// Replica count is deliberately omitted: scaling is owned by KEDA ScaledJobs +// on the consumer side, so a runner-org-sync-supplied replicas field would +// be ignored at best and misleading at worst. +func renderRunners(orgs []string, secretNameFor func(org string) string) string { + runners := make([]runnerConfig, 0, len(orgs)) + for _, org := range orgs { + runners = append(runners, runnerConfig{ + Name: org, + RegistrationTokenSecretName: secretNameFor(org), + }) + } + out, err := yaml.Marshal(runnerValues{Runners: runners}) + if err != nil { + // The input is a simple slice of strings rendered into a static struct; + // yaml.Marshal should not fail. Keep an empty runner list if it ever + // does, so the chart does not reference stale runners. + return "runners: []\n" + } + return string(out) +} + +type runnerValues struct { + Runners []runnerConfig `json:"runners"` +} + +type runnerConfig struct { + Name string `json:"name"` + RegistrationTokenSecretName string `json:"registrationTokenSecretName"` +} + +func orgCodes(orgs []cdn.Org) []string { + out := make([]string, 0, len(orgs)) + for _, o := range orgs { + out = append(out, o.Code) + } + return out +} diff --git a/src/runner-org-sync/internal/reconcile/reconcile_test.go b/src/runner-org-sync/internal/reconcile/reconcile_test.go new file mode 100644 index 00000000000..f3192242be6 --- /dev/null +++ b/src/runner-org-sync/internal/reconcile/reconcile_test.go @@ -0,0 +1,542 @@ +package reconcile + +import ( + "context" + "errors" + "fmt" + "sort" + "strings" + "testing" + + "altinn.studio/runner-org-sync/internal/cdn" + "altinn.studio/runner-org-sync/internal/gitea" + "altinn.studio/runner-org-sync/internal/k8sstate" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// --- stub dependencies ------------------------------------------------------ + +type stubSource struct { + orgs []cdn.Org + err error +} + +func (s *stubSource) Fetch(_ context.Context) ([]cdn.Org, error) { return s.orgs, s.err } + +type stubMinter struct { + // per-org overrides: token to return or error to raise. + tokens map[string]string + errs map[string]error + calls []string +} + +func (m *stubMinter) MintRegistrationToken(_ context.Context, org string) (string, error) { + m.calls = append(m.calls, org) + if err, ok := m.errs[org]; ok { + return "", err + } + if t, ok := m.tokens[org]; ok { + return t, nil + } + return "tok-" + org, nil +} + +type stubStore struct { + managed []corev1.Secret + statusByName map[string]k8sstate.RegistrationSecretState + createErr map[string]error + deleteErr map[string]error + applyCMErr error + listErr error + existsErr error + createdSecrets []string + createdOrgs map[string]string + deletedSecrets []string + appliedCMData map[string]string + appliedCMChange bool +} + +func newStubStore() *stubStore { + return &stubStore{ + statusByName: map[string]k8sstate.RegistrationSecretState{}, + createErr: map[string]error{}, + deleteErr: map[string]error{}, + createdOrgs: map[string]string{}, + appliedCMChange: true, + } +} + +func (s *stubStore) ListManagedSecrets(_ context.Context) ([]corev1.Secret, error) { + return s.managed, s.listErr +} + +func (s *stubStore) RegistrationSecretStatus(_ context.Context, name, _ string) (k8sstate.RegistrationSecretState, error) { + if s.existsErr != nil { + return "", s.existsErr + } + if status, ok := s.statusByName[name]; ok { + return status, nil + } + return k8sstate.RegistrationSecretMissing, nil +} + +func (s *stubStore) CreateRegistrationSecret(_ context.Context, name, org, _ string) error { + if err, ok := s.createErr[name]; ok { + return err + } + s.createdSecrets = append(s.createdSecrets, name) + s.createdOrgs[name] = org + s.statusByName[name] = k8sstate.RegistrationSecretValid + return nil +} + +func (s *stubStore) DeleteSecret(_ context.Context, name string) error { + if err, ok := s.deleteErr[name]; ok { + return err + } + s.deletedSecrets = append(s.deletedSecrets, name) + return nil +} + +func (s *stubStore) ApplyConfigMap(_ context.Context, _ string, data map[string]string) (bool, error) { + if s.applyCMErr != nil { + return false, s.applyCMErr + } + s.appliedCMData = data + return s.appliedCMChange, nil +} + +// --- helpers ---------------------------------------------------------------- + +func secretNameFor(org string) string { return "altinn-gitea-runner-" + org + "-secret" } + +func managedSecret(name, org string) corev1.Secret { + return corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Labels: map[string]string{ + "runner-org-sync.altinn.studio/org": org, + "app.kubernetes.io/managed-by": "runner-org-sync", + }, + }, + } +} + +func runReconciler(t *testing.T, src *stubSource, minter *stubMinter, store *stubStore, whitelist []string, syncAll bool) Report { + t.Helper() + r, err := New(Options{ + Source: src, + Minter: minter, + Store: store, + SecretNameFor: secretNameFor, + ConfigMapName: "runner-org-list", + Whitelist: whitelist, + SyncAll: syncAll, + }) + if err != nil { + t.Fatalf("New: %v", err) + } + rep, err := r.Run(context.Background()) + if err != nil { + t.Fatalf("Run: %v", err) + } + return rep +} + +// --- scenarios from the design ---------------------------------------------- + +// Scenario 1: cold start, three orgs, all desired, no existing Secrets. +func TestRun_ColdStart(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02", "production"}}, + {Code: "brg", Environments: []string{"production"}}, + {Code: "dsb", Environments: []string{"tt02"}}, + }} + minter := &stubMinter{} + store := newStubStore() + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg", "dsb"}, false) + + if rep.Outcome != OutcomeSuccess { + t.Errorf("outcome = %v, want success", rep.Outcome) + } + wantCreated := []string{"brg", "dsb", "ttd"} + if !equalSlice(rep.SecretsCreated, wantCreated) { + t.Errorf("SecretsCreated = %v, want %v", rep.SecretsCreated, wantCreated) + } + if !equalSlice(minter.calls, wantCreated) { + t.Errorf("minter calls = %v, want %v (sorted)", minter.calls, wantCreated) + } + if !rep.ConfigMapChanged { + t.Errorf("ConfigMapChanged = false, want true on cold start") + } + wantBody := strings.Join([]string{ + "runners:", + "- name: brg", + " registrationTokenSecretName: altinn-gitea-runner-brg-secret", + "- name: dsb", + " registrationTokenSecretName: altinn-gitea-runner-dsb-secret", + "- name: ttd", + " registrationTokenSecretName: altinn-gitea-runner-ttd-secret", + "", + }, "\n") + if got := store.appliedCMData[ConfigMapDataKey]; got != wantBody { + t.Errorf("ConfigMap body =\n%q\nwant\n%q", got, wantBody) + } +} + +// Scenario 2: re-run with no upstream change → no Secret writes, no mint calls. +func TestRun_IdempotentReRun(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "brg", Environments: []string{"production"}}, + }} + minter := &stubMinter{} + store := newStubStore() + // pre-populate existing state — secrets exist for both orgs and we own them. + store.statusByName["altinn-gitea-runner-ttd-secret"] = k8sstate.RegistrationSecretValid + store.statusByName["altinn-gitea-runner-brg-secret"] = k8sstate.RegistrationSecretValid + store.managed = []corev1.Secret{ + managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), + managedSecret("altinn-gitea-runner-brg-secret", "brg"), + } + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg"}, false) + + if rep.Outcome != OutcomeSuccess { + t.Errorf("outcome = %v, want success", rep.Outcome) + } + if len(minter.calls) != 0 { + t.Errorf("minter should not be called on idempotent re-run; got %v", minter.calls) + } + if len(store.createdSecrets) != 0 { + t.Errorf("no creates expected; got %v", store.createdSecrets) + } + if len(store.deletedSecrets) != 0 { + t.Errorf("no deletes expected; got %v", store.deletedSecrets) + } + wantSkipped := []string{"brg", "ttd"} + if !equalSlice(rep.SecretsSkipped, wantSkipped) { + t.Errorf("SecretsSkipped = %v, want %v", rep.SecretsSkipped, wantSkipped) + } +} + +// Scenario 3: org added to desired set → exactly one mint + create. +func TestRun_OrgAdded(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "brg", Environments: []string{"production"}}, + {Code: "dsb", Environments: []string{"production"}}, // new + }} + minter := &stubMinter{} + store := newStubStore() + store.statusByName["altinn-gitea-runner-ttd-secret"] = k8sstate.RegistrationSecretValid + store.statusByName["altinn-gitea-runner-brg-secret"] = k8sstate.RegistrationSecretValid + store.managed = []corev1.Secret{ + managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), + managedSecret("altinn-gitea-runner-brg-secret", "brg"), + } + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg", "dsb"}, false) + + if rep.Outcome != OutcomeSuccess { + t.Errorf("outcome = %v, want success", rep.Outcome) + } + if !equalSlice(minter.calls, []string{"dsb"}) { + t.Errorf("minter calls = %v, want [dsb]", minter.calls) + } + if !equalSlice(rep.SecretsCreated, []string{"dsb"}) { + t.Errorf("SecretsCreated = %v, want [dsb]", rep.SecretsCreated) + } +} + +// Scenario 4: org removed from CDN → its Secret is deleted, ConfigMap reflects. +func TestRun_OrgRemoved(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + // brg is gone from CDN + }} + minter := &stubMinter{} + store := newStubStore() + store.statusByName["altinn-gitea-runner-ttd-secret"] = k8sstate.RegistrationSecretValid + store.statusByName["altinn-gitea-runner-brg-secret"] = k8sstate.RegistrationSecretValid + store.managed = []corev1.Secret{ + managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), + managedSecret("altinn-gitea-runner-brg-secret", "brg"), + } + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg"}, false) + + if rep.Outcome != OutcomeSuccess { + t.Errorf("outcome = %v, want success", rep.Outcome) + } + if !equalSlice(store.deletedSecrets, []string{"altinn-gitea-runner-brg-secret"}) { + t.Errorf("deletedSecrets = %v", store.deletedSecrets) + } + if !equalSlice(rep.SecretsDeleted, []string{"brg"}) { + t.Errorf("SecretsDeleted = %v, want [brg]", rep.SecretsDeleted) + } +} + +// Scenario 5: org with empty environments → filtered out, no work for it. +func TestRun_FilteredByEmptyEnvironments(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "acn", Environments: nil}, // test org, no envs → filter out + }} + minter := &stubMinter{} + store := newStubStore() + + rep := runReconciler(t, src, minter, store, []string{"ttd", "acn"}, false) + + if !equalSlice(rep.FilteredNoEnv, []string{"acn"}) { + t.Errorf("FilteredNoEnv = %v, want [acn]", rep.FilteredNoEnv) + } + if !equalSlice(rep.Desired, []string{"ttd"}) { + t.Errorf("Desired = %v, want [ttd]", rep.Desired) + } + if containsString(minter.calls, "acn") { + t.Errorf("acn should not be minted; got calls %v", minter.calls) + } +} + +// Scenario 6: whitelist excludes an otherwise-eligible org. +func TestRun_FilteredByWhitelist(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "brg", Environments: []string{"production"}}, + {Code: "extra", Environments: []string{"production"}}, // not in whitelist + }} + minter := &stubMinter{} + store := newStubStore() + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg"}, false) + + if !equalSlice(rep.FilteredWhitelist, []string{"extra"}) { + t.Errorf("FilteredWhitelist = %v, want [extra]", rep.FilteredWhitelist) + } + if !equalSlice(rep.Desired, []string{"brg", "ttd"}) { + t.Errorf("Desired = %v, want [brg ttd]", rep.Desired) + } +} + +// Scenario 7: Gitea fails for one org, others succeed; failed org omitted from CM. +func TestRun_GiteaPartialFailure(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "brg", Environments: []string{"production"}}, + {Code: "dsb", Environments: []string{"production"}}, + }} + minter := &stubMinter{ + errs: map[string]error{"brg": errors.New("gitea 500")}, + } + store := newStubStore() + + rep := runReconciler(t, src, minter, store, []string{"ttd", "brg", "dsb"}, false) + + if rep.Outcome != OutcomePartial { + t.Errorf("outcome = %v, want partial", rep.Outcome) + } + if len(rep.FailedOrgs) != 1 || rep.FailedOrgs[0].Org != "brg" || rep.FailedOrgs[0].Stage != StageMint { + t.Errorf("FailedOrgs = %v, want [{brg mint ...}]", rep.FailedOrgs) + } + if containsString(rep.SecretsCreated, "brg") { + t.Errorf("brg should not be in SecretsCreated; got %v", rep.SecretsCreated) + } + if !strings.Contains(store.appliedCMData[ConfigMapDataKey], "name: ttd") { + t.Errorf("ConfigMap should include ttd") + } + if strings.Contains(store.appliedCMData[ConfigMapDataKey], "name: brg") { + t.Errorf("ConfigMap should NOT include brg (mint failed)") + } +} + +func TestRun_InvalidExistingSecretIsNotProjected(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + }} + minter := &stubMinter{} + store := newStubStore() + store.statusByName["altinn-gitea-runner-ttd-secret"] = k8sstate.RegistrationSecretInvalid + + rep := runReconciler(t, src, minter, store, []string{"ttd"}, false) + + if rep.Outcome != OutcomePartial { + t.Errorf("outcome = %v, want partial", rep.Outcome) + } + if len(rep.FailedOrgs) != 1 || rep.FailedOrgs[0].Org != "ttd" || rep.FailedOrgs[0].Stage != StageValidate { + t.Errorf("FailedOrgs = %v, want [{ttd validate ...}]", rep.FailedOrgs) + } + if len(minter.calls) != 0 { + t.Errorf("minter should not be called when same-name invalid secret exists; got %v", minter.calls) + } + if got := store.appliedCMData[ConfigMapDataKey]; got != "runners: []\n" { + t.Errorf("ConfigMap body = %q, want empty runner list", got) + } +} + +// --- additional coverage ---------------------------------------------------- + +// Auth failures hit every org with the same PAT — failing fast avoids a +// cascade of identical errors and keeps failure attribution clean. +func TestRun_FatalOnUnauthorizedMint(t *testing.T) { + // Reconciler sorts orgs alphabetically; "aaa" gets minted first. + // If the first org returns ErrUnauthorized we should bail fatally + // before attempting "zzz". + src := &stubSource{orgs: []cdn.Org{ + {Code: "aaa", Environments: []string{"tt02"}}, + {Code: "zzz", Environments: []string{"tt02"}}, + }} + minter := &stubMinter{ + errs: map[string]error{ + "aaa": fmt.Errorf("minting aaa: %w", gitea.ErrUnauthorized), + }, + } + store := newStubStore() + + r, _ := New(Options{ + Source: src, + Minter: minter, + Store: store, + SecretNameFor: secretNameFor, + ConfigMapName: "runner-org-list", + Whitelist: []string{"aaa", "zzz"}, + }) + _, err := r.Run(context.Background()) + if err == nil { + t.Fatal("expected fatal error on ErrUnauthorized, got nil") + } + if !errors.Is(err, gitea.ErrUnauthorized) { + t.Errorf("expected wrapped ErrUnauthorized, got %v", err) + } + // zzz should NOT have been attempted — fail-fast short-circuits. + if containsString(minter.calls, "zzz") { + t.Errorf("zzz should not be minted after aaa's 401; got calls %v", minter.calls) + } +} + +func TestRun_FatalOnSourceError(t *testing.T) { + r, _ := New(Options{ + Source: &stubSource{err: errors.New("cdn down")}, + Minter: &stubMinter{}, + Store: newStubStore(), + SecretNameFor: secretNameFor, + ConfigMapName: "runner-org-list", + Whitelist: []string{"ttd"}, + }) + _, err := r.Run(context.Background()) + if err == nil { + t.Fatal("expected fatal error, got nil") + } +} + +func TestRun_FatalOnApplyConfigMapError(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{{Code: "ttd", Environments: []string{"tt02"}}}} + store := newStubStore() + store.applyCMErr = errors.New("apiserver hiccup") + + r, _ := New(Options{ + Source: src, + Minter: &stubMinter{}, + Store: store, + SecretNameFor: secretNameFor, + ConfigMapName: "runner-org-list", + Whitelist: []string{"ttd"}, + }) + _, err := r.Run(context.Background()) + if err == nil { + t.Fatal("expected fatal error, got nil") + } +} + +func TestRun_SyncAllSkipsWhitelist(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{ + {Code: "ttd", Environments: []string{"tt02"}}, + {Code: "brg", Environments: []string{"production"}}, + }} + r, _ := New(Options{ + Source: src, + Minter: &stubMinter{}, + Store: newStubStore(), + SecretNameFor: secretNameFor, + ConfigMapName: "runner-org-list", + SyncAll: true, + }) + rep, err := r.Run(context.Background()) + if err != nil { + t.Fatalf("Run: %v", err) + } + if !equalSlice(rep.Desired, []string{"brg", "ttd"}) { + t.Errorf("Desired = %v, want [brg ttd]", rep.Desired) + } + if len(rep.FilteredWhitelist) != 0 { + t.Errorf("nothing should be filtered by whitelist when SyncAll=true; got %v", rep.FilteredWhitelist) + } +} + +func TestRun_UnlabelledManagedSecretIsSkippedOnDelete(t *testing.T) { + src := &stubSource{orgs: []cdn.Org{{Code: "ttd", Environments: []string{"tt02"}}}} + store := newStubStore() + store.statusByName["altinn-gitea-runner-ttd-secret"] = k8sstate.RegistrationSecretValid + store.managed = []corev1.Secret{ + managedSecret("altinn-gitea-runner-ttd-secret", "ttd"), + // drift: managed-by label but no org label + {ObjectMeta: metav1.ObjectMeta{Name: "stray", Labels: map[string]string{"app.kubernetes.io/managed-by": "runner-org-sync"}}}, + } + rep := runReconciler(t, src, &stubMinter{}, store, []string{"ttd"}, false) + + if rep.Outcome != OutcomeSuccess { + t.Errorf("outcome = %v, want success", rep.Outcome) + } + if containsString(store.deletedSecrets, "stray") { + t.Errorf("stray secret should not be deleted without org label; got deletes %v", store.deletedSecrets) + } +} + +func TestNew_Validation(t *testing.T) { + cases := []struct { + name string + opts Options + }{ + {"no source", Options{Minter: &stubMinter{}, Store: newStubStore(), SecretNameFor: secretNameFor, ConfigMapName: "x", Whitelist: []string{"a"}}}, + {"no minter", Options{Source: &stubSource{}, Store: newStubStore(), SecretNameFor: secretNameFor, ConfigMapName: "x", Whitelist: []string{"a"}}}, + {"no store", Options{Source: &stubSource{}, Minter: &stubMinter{}, SecretNameFor: secretNameFor, ConfigMapName: "x", Whitelist: []string{"a"}}}, + {"no secretNameFor", Options{Source: &stubSource{}, Minter: &stubMinter{}, Store: newStubStore(), ConfigMapName: "x", Whitelist: []string{"a"}}}, + {"no configMapName", Options{Source: &stubSource{}, Minter: &stubMinter{}, Store: newStubStore(), SecretNameFor: secretNameFor, Whitelist: []string{"a"}}}, + {"empty whitelist & !syncAll", Options{Source: &stubSource{}, Minter: &stubMinter{}, Store: newStubStore(), SecretNameFor: secretNameFor, ConfigMapName: "x"}}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + if _, err := New(c.opts); err == nil { + t.Errorf("expected error, got nil") + } + }) + } +} + +// --- utilities -------------------------------------------------------------- + +func equalSlice(a, b []string) bool { + if len(a) != len(b) { + return false + } + ac, bc := append([]string(nil), a...), append([]string(nil), b...) + sort.Strings(ac) + sort.Strings(bc) + for i := range ac { + if ac[i] != bc[i] { + return false + } + } + return true +} + +func containsString(haystack []string, needle string) bool { + for _, s := range haystack { + if s == needle { + return true + } + } + return false +} diff --git a/src/runner-org-sync/internal/telemetry/metrics.go b/src/runner-org-sync/internal/telemetry/metrics.go new file mode 100644 index 00000000000..318299cc16f --- /dev/null +++ b/src/runner-org-sync/internal/telemetry/metrics.go @@ -0,0 +1,101 @@ +package telemetry + +import ( + "context" + "fmt" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +// Metrics is the typed bundle of instruments emitted by runner-org-sync. +// Construct once at startup with NewMetrics; record from the run summary +// after Reconciler.Run returns. +type Metrics struct { + ReconcileDuration metric.Float64Histogram + ReconcileRuns metric.Int64Counter + OrgsDiscovered metric.Int64Gauge + OrgsDesired metric.Int64Gauge + OrgsFiltered metric.Int64Counter + SecretsCreated metric.Int64Counter + SecretsDeleted metric.Int64Counter + SecretsSkipped metric.Int64Counter + OrgReconcileErrors metric.Int64Counter + ConfigMapApplied metric.Int64Counter + KedaSecretApplied metric.Int64Counter +} + +// NewMetrics constructs every instrument from the package's Meter (set up +// by ConfigureOTel). Returns an error if any instrument cannot be created; +// in practice this only fires on misconfigured SDKs. +func NewMetrics() (*Metrics, error) { + m := Meter() + mk := func(target *metric.Float64Histogram, name, desc, unit string) error { + h, err := m.Float64Histogram(name, metric.WithDescription(desc), metric.WithUnit(unit)) + if err != nil { + return fmt.Errorf("telemetry: histogram %s: %w", name, err) + } + *target = h + return nil + } + mc := func(target *metric.Int64Counter, name, desc string) error { + c, err := m.Int64Counter(name, metric.WithDescription(desc)) + if err != nil { + return fmt.Errorf("telemetry: counter %s: %w", name, err) + } + *target = c + return nil + } + mg := func(target *metric.Int64Gauge, name, desc string) error { + g, err := m.Int64Gauge(name, metric.WithDescription(desc)) + if err != nil { + return fmt.Errorf("telemetry: gauge %s: %w", name, err) + } + *target = g + return nil + } + + out := &Metrics{} + if err := mk(&out.ReconcileDuration, "runner_org_sync.reconcile.duration", "End-to-end reconcile run duration.", "s"); err != nil { + return nil, err + } + if err := mc(&out.ReconcileRuns, "runner_org_sync.reconcile.runs", "Reconcile run count by outcome."); err != nil { + return nil, err + } + if err := mg(&out.OrgsDiscovered, "runner_org_sync.orgs.discovered", "Orgs returned by the CDN."); err != nil { + return nil, err + } + if err := mg(&out.OrgsDesired, "runner_org_sync.orgs.desired", "Orgs after environment + whitelist filter."); err != nil { + return nil, err + } + if err := mc(&out.OrgsFiltered, "runner_org_sync.orgs.filtered", "Orgs filtered out, by reason."); err != nil { + return nil, err + } + if err := mc(&out.SecretsCreated, "runner_org_sync.secrets.created", "Per-org Secrets created this run."); err != nil { + return nil, err + } + if err := mc(&out.SecretsDeleted, "runner_org_sync.secrets.deleted", "Per-org Secrets deleted this run."); err != nil { + return nil, err + } + if err := mc(&out.SecretsSkipped, "runner_org_sync.secrets.skipped", "Per-org Secrets left untouched (already existed)."); err != nil { + return nil, err + } + if err := mc(&out.OrgReconcileErrors, "runner_org_sync.org.reconcile_errors", "Per-org reconcile failures by stage."); err != nil { + return nil, err + } + if err := mc(&out.ConfigMapApplied, "runner_org_sync.configmap.applied", "ConfigMap apply attempts by changed=true|false."); err != nil { + return nil, err + } + if err := mc(&out.KedaSecretApplied, "runner_org_sync.keda_secret.applied", "KEDA PAT Secret apply attempts by changed=true|false and success=true|false."); err != nil { + return nil, err + } + return out, nil +} + +// RecordFiltered increments OrgsFiltered with the given reason attribute. +func (m *Metrics) RecordFiltered(ctx context.Context, reason string, n int) { + if n <= 0 { + return + } + m.OrgsFiltered.Add(ctx, int64(n), metric.WithAttributes(attribute.String("reason", reason))) +} diff --git a/src/runner-org-sync/internal/telemetry/telemetry.go b/src/runner-org-sync/internal/telemetry/telemetry.go new file mode 100644 index 00000000000..cf1b4ef196f --- /dev/null +++ b/src/runner-org-sync/internal/telemetry/telemetry.go @@ -0,0 +1,138 @@ +// Package telemetry configures OpenTelemetry traces and metrics, sets the +// default slog logger, and exposes package-level Tracer/Meter accessors. +// +// Mirrors the pattern used by src/Runtime/pdf3/internal/telemetry: callers +// invoke ConfigureOTel once at startup, defer the returned shutdown, and use +// telemetry.Tracer() / telemetry.Meter() anywhere they need an instrument. +// No per-handle struct to thread through call sites — OTel's global +// providers do that work. +package telemetry + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/propagation" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" + "go.opentelemetry.io/otel/trace" +) + +const scope = "altinn.studio/runner-org-sync" + +// Tracer returns the package's tracer. Safe to call before ConfigureOTel — +// the OTel SDK's default global provider is a no-op until a real one is +// installed, so the returned tracer always works. +// +//nolint:ireturn // OpenTelemetry intentionally exposes interface-returning accessors. +func Tracer() trace.Tracer { + return otel.Tracer(scope) +} + +// Meter returns the package's meter. Same semantics as Tracer. +// +//nolint:ireturn // OpenTelemetry intentionally exposes interface-returning accessors. +func Meter() metric.Meter { + return otel.Meter(scope) +} + +// ConfigureOTel bootstraps OpenTelemetry (traces + metrics) and sets the +// default slog logger. Always defer the returned shutdown on exit with a +// bounded context — 10s is plenty for our payload sizes. +// +// If OTEL_EXPORTER_OTLP_ENDPOINT is unset (typical for local dev) the OTLP +// exporters are skipped entirely and the global no-op providers continue to +// satisfy Tracer() / Meter() calls. +func ConfigureOTel(ctx context.Context, serviceName string) (func(context.Context) error, error) { + if serviceName == "" { + serviceName = "runner-org-sync" + } + + // Default slog handler: JSON to stdout. Keeps `kubectl logs` readable + // for humans and parseable for log aggregators. + slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelInfo, + }))) + + res, err := resource.New(ctx, + resource.WithAttributes(semconv.ServiceName(serviceName)), + resource.WithFromEnv(), // OTEL_RESOURCE_ATTRIBUTES + resource.WithProcessPID(), + resource.WithHost(), + ) + if err != nil { + return nil, fmt.Errorf("telemetry: resource: %w", err) + } + + // Set propagator so any future cross-service call (HTTP/gRPC) preserves + // trace context automatically. Free for us — costs nothing if unused. + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) + + noop := func(context.Context) error { return nil } + if !otlpEndpointConfigured() { + return noop, nil + } + + var shutdownFuncs []func(context.Context) error + shutdown := func(ctx context.Context) error { + var shutdownErr error + for _, fn := range shutdownFuncs { + shutdownErr = errors.Join(shutdownErr, fn(ctx)) + } + shutdownFuncs = nil + return shutdownErr + } + + traceExp, err := otlptracegrpc.New(ctx) + if err != nil { + return shutdown, fmt.Errorf("telemetry: trace exporter: %w", err) + } + tp := sdktrace.NewTracerProvider( + sdktrace.WithBatcher(traceExp), + sdktrace.WithResource(res), + sdktrace.WithSampler(sdktrace.AlwaysSample()), + ) + otel.SetTracerProvider(tp) + shutdownFuncs = append(shutdownFuncs, tp.Shutdown) + + metricExp, err := otlpmetricgrpc.New(ctx) + if err != nil { + return shutdown, fmt.Errorf("telemetry: metric exporter: %w", err) + } + mp := sdkmetric.NewMeterProvider( + sdkmetric.WithResource(res), + sdkmetric.WithReader(sdkmetric.NewPeriodicReader(metricExp, + sdkmetric.WithInterval(15*time.Second), + )), + ) + otel.SetMeterProvider(mp) + shutdownFuncs = append(shutdownFuncs, mp.Shutdown) + + return shutdown, nil +} + +func otlpEndpointConfigured() bool { + for _, k := range []string{ + "OTEL_EXPORTER_OTLP_ENDPOINT", + "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT", + "OTEL_EXPORTER_OTLP_METRICS_ENDPOINT", + } { + if os.Getenv(k) != "" { + return true + } + } + return false +}