diff --git a/.github/workflows/commit.yml b/.github/workflows/commit.yml index 257f7093..3799a122 100644 --- a/.github/workflows/commit.yml +++ b/.github/workflows/commit.yml @@ -62,7 +62,7 @@ jobs: pip install pdoc pdoc -o /tmp/_html ./mozilla_schema_generator - name: Upload docs artifact - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: docs-html path: /tmp/_html/ @@ -88,7 +88,7 @@ jobs: ls -al /tmp/ ls -al /tmp/_html/ - name: Setup Node.js - uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v6.3.0 + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 with: node-version: '24' - name: Install and configure dependencies diff --git a/.github/workflows/schema-diff.yml b/.github/workflows/schema-diff.yml new file mode 100644 index 00000000..a0542dac --- /dev/null +++ b/.github/workflows/schema-diff.yml @@ -0,0 +1,163 @@ +name: Generated Schema Diff +on: + pull_request: + paths: + - 'mozilla_schema_generator/__main__.py' + - 'mozilla_schema_generator/glean_ping.py' + - 'mozilla_schema_generator/configs/**' + +jobs: + generate-main: + name: Generate schemas from main + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout main + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + ref: main + persist-credentials: false + - name: Add generate_glean.sh from PR HEAD + env: + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + run: | + git fetch --depth=1 origin "$PR_HEAD_SHA" + git checkout "$PR_HEAD_SHA" -- bin/generate_glean.sh + - &setup-python + name: Set up Python + uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 + with: + python-version: '3.14' + cache: 'pip' + - &cache-jst + name: Cache jsonschema-transpiler + id: jst-cache + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: ~/.cargo/bin/jsonschema-transpiler + key: jst-2.0.1-${{ runner.os }} + - &install-jst + name: Install jsonschema-transpiler + if: steps.jst-cache.outputs.cache-hit != 'true' + run: cargo install jsonschema-transpiler --version 2.0.1 --locked + - &add-cargo-path + name: Add cargo bin to PATH + run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH + - &cache-probes + name: Restore probe cache + uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5 + with: + path: .probe_cache + # Using a coarse key because the actual probes don't matter + key: probe-cache-${{ github.head_ref }} + restore-keys: | + probe-cache- + - &install-reqs + name: Install requirements + run: | + make install-requirements + pip install -e . + - &fetch-mps-metadata + name: Fetch metadata/ from mozilla-pipeline-schemas + run: | + git clone --depth=1 --filter=blob:none --sparse \ + --branch generated-schemas \ + https://github.com/mozilla-services/mozilla-pipeline-schemas.git /tmp/mps + git -C /tmp/mps sparse-checkout set schemas/metadata + cp -r /tmp/mps/schemas/metadata ./metadata + - &generate-schemas + name: Generate schemas + run: bin/generate_glean.sh + - name: Upload main schemas + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: main-schemas + path: schema_out/ + + generate-pr: + name: Generate schemas from PR + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - name: Checkout PR + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + - *setup-python + - *cache-jst + - *install-jst + - *add-cargo-path + - *cache-probes + - *install-reqs + - *fetch-mps-metadata + - *generate-schemas + - name: Upload PR schemas + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: pr-schemas + path: schema_out/ + + schema-diff: + name: Diff PR schemas against main + runs-on: ubuntu-latest + needs: [generate-main, generate-pr] + permissions: + contents: write + pull-requests: write + steps: + - name: Checkout repo + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: true + - name: Download main schemas + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: main-schemas + path: /tmp/main-schemas + - name: Download PR schemas + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: pr-schemas + path: /tmp/pr-schemas + - name: Push schemas to ci-schema-diff-test branch + id: push-diff + env: + DIFF_BRANCH: ci-schema-diff-test + PR_NUMBER: ${{ github.event.pull_request.number }} + PR_HEAD_SHA: ${{ github.event.pull_request.head.sha }} + MAIN_SHA: ${{ github.event.pull_request.base.sha }} + run: | + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + + # Start the diff branch from an empty tree so it doesn't have any history + git checkout --orphan "$DIFF_BRANCH" + git rm -rf . > /dev/null 2>&1 || true + git clean -fdx + + # Commit 1: schemas generated from main + mkdir -p schema_out + cp -r /tmp/main-schemas/. schema_out/ + git add schema_out + git commit --allow-empty -m "PR #${PR_NUMBER}: generated schemas from main (${MAIN_SHA:0:8})" + + # Commit 2: schemas generated from PR head + rm -rf schema_out + mkdir -p schema_out + cp -r /tmp/pr-schemas/. schema_out/ + git add -A schema_out + git commit --allow-empty -m "PR #${PR_NUMBER}: generated schemas from ${PR_HEAD_SHA:0:8}" + + git push --force origin "$DIFF_BRANCH" + + DIFF_COMMIT_SHA=$(git rev-parse HEAD) + echo "diff_commit_sha=$DIFF_COMMIT_SHA" >> "$GITHUB_OUTPUT" + { + echo "## Generated Schema Diff Branch" + echo "" + echo "Pushed to [\`$DIFF_BRANCH\`](https://github.com/${GITHUB_REPOSITORY}/tree/${DIFF_BRANCH})." + echo "" + echo "View this PR's schema changes: [commit ${DIFF_COMMIT_SHA:0:7}](https://github.com/${GITHUB_REPOSITORY}/commit/${DIFF_COMMIT_SHA})" + } >> "$GITHUB_STEP_SUMMARY" diff --git a/bin/generate_glean.sh b/bin/generate_glean.sh new file mode 100755 index 00000000..3b4d4546 --- /dev/null +++ b/bin/generate_glean.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Generate glean pings for all apps and write to schema_out/ at the repo root +# The steps here mirror the job to create the generated-schemas branch + +set -euo pipefail + +cd "$(dirname "$(readlink -f "$0")")/.." + +mozilla-schema-generator generate-glean-pings --out-dir schema_out/ --pretty + +for schema in $(find schema_out -name "*.schema.json" -type f); do + bin/metadata_merge metadata/ "$schema" +done + +# Add transpiled BQ schemas +find schema_out -type f -name "*.schema.json" | while read -r fname; do + bq_out=${fname/schema.json/bq} + mkdir -p "$(dirname "$bq_out")" + jsonschema-transpiler \ + --resolve drop \ + --type bigquery \ + --normalize-case \ + --force-nullable \ + --tuple-struct \ + "$fname" > "$bq_out" +done + +mozilla-schema-generator generate-glean-pings --pretty --generic-schema --out-dir schema_out diff --git a/mozilla_schema_generator/configs/glean_v2_allowlist.yaml b/mozilla_schema_generator/configs/glean_v2_allowlist.yaml index 7352f651..f71fe024 100644 --- a/mozilla_schema_generator/configs/glean_v2_allowlist.yaml +++ b/mozilla_schema_generator/configs/glean_v2_allowlist.yaml @@ -60,9 +60,9 @@ # net-thunderbird-android-daily: # - metrics # - health -# org-mozilla-connect-firefox: -# - metrics -# - health +#org-mozilla-connect-firefox: +#- metrics +#- health # org-mozilla-fenix: # - metrics # - sync diff --git a/mozilla_schema_generator/validate_bigquery.py b/mozilla_schema_generator/validate_bigquery.py index 0f47a037..39e15442 100755 --- a/mozilla_schema_generator/validate_bigquery.py +++ b/mozilla_schema_generator/validate_bigquery.py @@ -59,7 +59,7 @@ def copy_schemas(head: str, repository: Path, artifact: Path) -> Path: evolution checks.""" src = Path(repository) repo = Repo(repository) - dst = Path(artifact) / repo.rev_parse(head).name_rev.replace(" ", "_") + dst = Path(artifact) / repo.rev_parse(head).hexsha dst.mkdir(parents=True, exist_ok=True) schemas = sorted(src.glob("**/*.bq")) if not schemas: diff --git a/tests/resources/mozilla-pipeline-schemas b/tests/resources/mozilla-pipeline-schemas index 0657218c..b5327b51 160000 --- a/tests/resources/mozilla-pipeline-schemas +++ b/tests/resources/mozilla-pipeline-schemas @@ -1 +1 @@ -Subproject commit 0657218c174c7cef571017ff9090bb1b8ebb726e +Subproject commit b5327b512f06cb7693649d5d03879ca7c73bea26 diff --git a/tests/test_validate_bigquery.py b/tests/test_validate_bigquery.py index 3770ebdf..438a798b 100644 --- a/tests/test_validate_bigquery.py +++ b/tests/test_validate_bigquery.py @@ -83,9 +83,7 @@ def test_copy_schemas(tmp_path, tmp_git): assert len(bq) > 0, "no bq schemas detected" assert len(bq) == len(txt) repo = Repo(tmp_git) - # the dst name encodes the revision, we should always be able to use the - # first 6 characters to find commit in the repository. - assert repo.rev_parse(dst.name[:6]) == repo.head.commit + assert repo.rev_parse(dst.name) == repo.head.commit def test_checkout_copy_schema_revisions(tmp_path, tmp_git): @@ -94,8 +92,8 @@ def test_checkout_copy_schema_revisions(tmp_path, tmp_git): base_ref = "generated-schemas~10" head, base = checkout_copy_schemas_revisions(head_ref, base_ref, tmp_git, tmp_path) assert head and base - assert repo.rev_parse(head.name[:8]) == repo.rev_parse(head_ref) - assert repo.rev_parse(base.name[:8]) == repo.rev_parse(base_ref) + assert repo.rev_parse(head.name) == repo.rev_parse(head_ref) + assert repo.rev_parse(base.name) == repo.rev_parse(base_ref) def test_checkout_copy_schema_revisions_fails_dirty(tmp_path, tmp_git):