feat(bigquery): Add pushdown_deny_usernames and pushdown_allow_usernames for server-side user filtering #48113
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Docker Build, Scan, Test | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| profileName: | |
| description: "Profile name for the smoke-test. Defaults to quickstart-consumers if not specified" | |
| required: false | |
| default: "quickstart-consumers" | |
| type: string | |
| push: | |
| branches: | |
| - master | |
| - releases/** | |
| pull_request: | |
| types: [opened, synchronize, reopened, labeled] | |
| branches: | |
| - "**" | |
| release: | |
| types: [published] | |
| concurrency: | |
| # Using `github.run_id` (unique val) instead of `github.ref` here | |
| # because we don't want to cancel this workflow on master only for PRs | |
| # as that makes reproducing issues easier | |
| # Adding github.event.action == labeled as a means to differentiate the trigger due to adding a label -- most labels are | |
| # no-ops except for `depot` | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.run_id }}-${{ github.event.action == 'labeled' }} | |
| cancel-in-progress: true | |
| env: | |
| DOCKER_REGISTRY: "acryldata" | |
| PROFILE_NAME: "${{ github.event.inputs.profileName || 'quickstart-consumers' }}" | |
| DOCKER_CACHE: "DEPOT" | |
| DEPOT_PROJECT_ID: "s0gr1cr3jd" | |
| HAS_DEPOT_LABEL: ${{ github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'depot') }} | |
| # Include Alpine variants for releases, or when PR has 'build-alpine-variant' label | |
| INCLUDE_ALPINE_VARIANTS: ${{ github.event_name == 'release' || (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'build-alpine-variant')) }} | |
| IS_FORK: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository }} | |
| DEPOT_TOKEN: "${{ secrets.DEPOT_TOKEN }}" | |
| permissions: | |
| contents: read | |
| id-token: write | |
| jobs: | |
| setup: | |
| runs-on: depot-ubuntu-24.04-small | |
| if: ${{ github.event_name != 'pull_request' || github.event.action != 'labeled' || github.event.label.name == 'depot' }} | |
| outputs: | |
| # TODO: Many of the vars below should not be required anymore. | |
| tag: ${{ steps.tag.outputs.tag }} | |
| slim_tag: ${{ steps.tag.outputs.slim_tag }} | |
| full_tag: ${{ steps.tag.outputs.full_tag }} | |
| short_sha: ${{ steps.tag.outputs.short_sha }} # needed for auto-deploy | |
| unique_tag: ${{ steps.tag.outputs.unique_tag }} | |
| unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }} | |
| unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }} | |
| docker-login: ${{ steps.docker-login.outputs.docker-login }} | |
| publish: ${{ steps.publish.outputs.publish }} | |
| pr-publish: ${{ steps.pr-publish.outputs.publish }} | |
| python_release_version: ${{ steps.tag.outputs.python_release_version }} | |
| branch_name: ${{ steps.tag.outputs.branch_name }} | |
| repository_name: ${{ steps.tag.outputs.repository_name }} | |
| frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' || github.event_name != 'pull_request' }} | |
| actions_change: ${{ steps.ci-optimize.outputs.actions-change == 'true' || github.event_name != 'pull_request'}} | |
| ingestion_change: ${{ steps.ci-optimize.outputs.ingestion-change == 'true' || github.event_name != 'pull_request' }} | |
| ingestion_base_change: ${{ steps.ci-optimize.outputs.ingestion-base-change == 'true' }} | |
| backend_change: ${{ steps.ci-optimize.outputs.backend-change == 'true' || github.event_name != 'pull_request' }} | |
| frontend_only: ${{ steps.ci-optimize.outputs.frontend-only == 'true' }} | |
| ingestion_only: ${{ steps.ci-optimize.outputs.ingestion-only == 'true' }} | |
| backend_only: ${{ steps.ci-optimize.outputs.backend-only == 'true' }} | |
| kafka_setup_change: ${{ steps.ci-optimize.outputs.kafka-setup-change == 'true' }} | |
| mysql_setup_change: ${{ steps.ci-optimize.outputs.mysql-setup-change == 'true' }} | |
| postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }} | |
| elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }} | |
| smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }} | |
| java_client_change: ${{ steps.ci-optimize.outputs.java-client-change == 'true' || github.event_name != 'pull_request' }} | |
| integrations_service_change: "false" | |
| datahub_executor_change: "false" | |
| build_runner_type: ${{ steps.set-runner.outputs.build_runner_type }} | |
| test_runner_type: ${{ steps.set-runner.outputs.test_runner_type }} | |
| test_runner_type_small: ${{ steps.set-runner.outputs.test_runner_type_small }} | |
| use_depot_cache: ${{ steps.set-runner.outputs.use_depot_cache }} | |
| uv_cache_key: ${{ steps.uv-cache-key.outputs.uv_cache_key }} | |
| uv_cache_key_prefix: ${{ steps.uv-cache-key.outputs.uv_cache_key_prefix }} | |
| yarn_cache_key: ${{ steps.yarn-cache-key.outputs.yarn_cache_key }} | |
| yarn_cache_key_prefix: ${{ steps.yarn-cache-key.outputs.yarn_cache_key_prefix }} | |
| steps: | |
| - name: Check out the repo | |
| uses: acryldata/sane-checkout-action@v4 | |
| - name: Compute Tag | |
| id: tag | |
| env: | |
| GITHUB_REF_FALLBACK: ${{ github.event_name == 'release' && format('refs/tags/{0}', github.event.release.tag_name) || github.ref}} | |
| GITHUB_EVENT_NAME: ${{ github.event_name }} | |
| run: | | |
| source .github/scripts/docker_helpers.sh | |
| { | |
| echo "short_sha=${SHORT_SHA}" | |
| echo "tag=$(get_tag)" | |
| echo "slim_tag=$(get_tag_slim)" | |
| echo "full_tag=$(get_tag_full)" | |
| echo "unique_tag=$(get_unique_tag)" | |
| echo "unique_slim_tag=$(get_unique_tag_slim)" | |
| echo "unique_full_tag=$(get_unique_tag_full)" | |
| echo "python_release_version=$(get_python_docker_release_v)" | |
| echo "branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" | |
| echo "repository_name=${GITHUB_REPOSITORY#*/}" | |
| } >> "$GITHUB_OUTPUT" | |
| - name: Check whether docker login is possible | |
| id: docker-login | |
| env: | |
| ENABLE_DOCKER_LOGIN: ${{ secrets.ACRYL_DOCKER_PASSWORD != '' }} | |
| run: | | |
| echo "Enable Docker Login: ${{ env.ENABLE_DOCKER_LOGIN }}" | |
| echo "docker-login=${{ env.ENABLE_DOCKER_LOGIN }}" >> "$GITHUB_OUTPUT" | |
| - name: Check whether publishing enabled | |
| id: publish | |
| env: | |
| ENABLE_PUBLISH: >- | |
| ${{ | |
| (github.event_name == 'release' || ((github.event_name == 'workflow_dispatch' || github.event_name == 'push') && github.ref == 'refs/heads/master')) | |
| && ( secrets.ACRYL_DOCKER_PASSWORD != '' ) | |
| }} | |
| run: | | |
| echo "Enable publish: ${{ env.ENABLE_PUBLISH }}" | |
| echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT" | |
| - name: Check whether PR publishing enabled | |
| id: pr-publish | |
| env: | |
| ENABLE_PUBLISH: >- | |
| ${{ | |
| (github.event_name == 'pull_request' && (contains(github.event.pull_request.labels.*.name, 'publish') || contains(github.event.pull_request.labels.*.name, 'publish-docker'))) | |
| && ( secrets.ACRYL_DOCKER_PASSWORD != '' ) | |
| }} | |
| run: | | |
| echo "Enable PR publish: ${{ env.ENABLE_PUBLISH }}" | |
| echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT" | |
| - uses: ./.github/actions/ci-optimization | |
| id: ci-optimize | |
| - name: Determine runner type | |
| id: set-runner | |
| # This needs to handle two scenarios: | |
| # 1. Running on a PR from a fork. We use github runners, unless the "depot" label exists -- in which case, we run | |
| # it on depotNote, concurrency is lower when using github runners, queue times can be longer, test time is longer | |
| # due to fewer parallel jobs. | |
| # 3. Running on a PR from a branch in the datahub-project org and push/schedule events on master. | |
| # Depot is used here for remote container builds in base_build and also for all runners. Depot runners support unlimited concurrency | |
| # and hence short queue times and higher parallelism of smoke tests | |
| run: | | |
| if [[ "${{ env.DOCKER_CACHE }}" == "DEPOT" && "${{ env.IS_FORK }}" == "false" ]]; then | |
| echo "build_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT" | |
| echo "test_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT" | |
| echo "test_runner_type_small=depot-ubuntu-24.04-small" >> "$GITHUB_OUTPUT" | |
| echo "use_depot_cache=true" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "build_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT" | |
| if [[ "${{ env.HAS_DEPOT_LABEL }}" == "true" ]]; then | |
| echo "test_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "test_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT" | |
| fi | |
| echo "test_runner_type_small=ubuntu-latest" >> "$GITHUB_OUTPUT" | |
| echo "use_depot_cache=false" >> "$GITHUB_OUTPUT" | |
| # publishing is currently only supported via depot | |
| fi | |
| - name: Compute UV Cache Key | |
| id: uv-cache-key | |
| run: | | |
| echo "uv_cache_key=docker-unified-${{ runner.os }}-uv-${{ hashFiles( | |
| './datahub-actions/pyproject.toml', | |
| './datahub-actions/setup.py', | |
| './smoke-test/requirements.txt', | |
| './smoke-test/pyproject.toml', | |
| './metadata-ingestion/pyproject.toml', | |
| './metadata-ingestion/setup.py') }}" >> "$GITHUB_OUTPUT" | |
| echo "uv_cache_key_prefix=docker-unified-${{ runner.os }}-uv-" >> "$GITHUB_OUTPUT" | |
| - name: Compute Yarn Cache Key | |
| id: yarn-cache-key | |
| run: | | |
| echo "yarn_cache_key=docker-unified-${{ runner.os }}-yarn-${{ hashFiles('./smoke-test/tests/cypress/yarn.lock', './datahub-web-react/yarn.lock') }}" >> "$GITHUB_OUTPUT" | |
| echo "yarn_cache_key_prefix=docker-unified-${{ runner.os }}-yarn-" >> "$GITHUB_OUTPUT" | |
| smoke_test_lint: | |
| name: Lint on smoke tests | |
| runs-on: ${{ needs.setup.outputs.test_runner_type_small }} | |
| needs: setup | |
| if: ${{ needs.setup.outputs.smoke_test_change == 'true' }} | |
| steps: | |
| - name: Check out the repo | |
| uses: acryldata/sane-checkout-action@v4 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.11" | |
| cache: "pip" | |
| - uses: actions/cache/restore@v4 | |
| with: | |
| path: | | |
| ~/.cache/uv | |
| key: ${{ needs.setup.outputs.uv_cache_key }} | |
| restore-keys: | | |
| ${{ needs.setup.outputs.uv_cache_key_prefix }} | |
| - uses: actions/cache/restore@v4 | |
| with: | |
| path: | | |
| ~/.cache/yarn | |
| key: ${{ needs.setup.outputs.yarn_cache_key }} | |
| restore-keys: | | |
| ${{ needs.setup.outputs.yarn_cache_key_prefix }} | |
| - name: Run lint on smoke test | |
| run: | | |
| python ./.github/scripts/check_python_package.py | |
| ./gradlew :smoke-test:pythonLint | |
| ./gradlew :smoke-test:cypressLint | |
| base_build: | |
| name: Build all images | |
| runs-on: ${{ needs.setup.outputs.build_runner_type }} | |
| needs: setup | |
| if: ${{ needs.setup.outputs.use_depot_cache == 'true' }} # On fork, smoke test job does the build since depot cache is not available | |
| outputs: | |
| build_id: ${{ steps.capture-build-id.outputs.build_id }} | |
| matrix: ${{ steps.capture-build-id.outputs.matrix }} | |
| steps: | |
| - name: Set up JDK 17 | |
| uses: actions/setup-java@v5 | |
| with: | |
| distribution: "zulu" | |
| java-version: 17 | |
| - uses: actions/cache/restore@v4 | |
| with: | |
| path: | | |
| ~/.cache/uv | |
| key: ${{ needs.setup.outputs.uv_cache_key }} | |
| restore-keys: | | |
| ${{ needs.setup.outputs.uv_cache_key_prefix }} | |
| - uses: actions/cache/restore@v4 | |
| with: | |
| path: | | |
| ~/.cache/yarn | |
| key: ${{ needs.setup.outputs.yarn_cache_key }} | |
| restore-keys: | | |
| ${{ needs.setup.outputs.yarn_cache_key_prefix }} | |
| - uses: actions/cache/restore@v4 | |
| with: | |
| path: | | |
| ~/.gradle/wrapper | |
| ~/.gradle/caches/modules-2 | |
| ~/.gradle/caches/jars-* | |
| ~/.gradle/caches/transforms-* | |
| key: gradle-plugins-cache | |
| restore-keys: | | |
| gradle-plugins-cache | |
| - name: Set up Depot CLI | |
| if: ${{ env.DOCKER_CACHE == 'DEPOT' }} | |
| uses: depot/setup-action@v1 | |
| - name: Check out the repo | |
| uses: acryldata/sane-checkout-action@v4 | |
| with: | |
| checkout-head-only: false | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.11" | |
| cache: "pip" | |
| - name: Login to DockerHub | |
| uses: docker/login-action@v3 | |
| if: ${{ needs.setup.outputs.docker-login == 'true' }} | |
| with: | |
| username: ${{ secrets.ACRYL_DOCKER_USERNAME }} | |
| password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} | |
| - name: Build all Images (For Smoke tests) | |
| if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' }} | |
| # If not publishing, just a subset of images required for smoke tests is sufficient. | |
| # Use buildImagesAll for workflow_dispatch, otherwise buildImagesQuickStartDebugConsumers | |
| run: | | |
| if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then | |
| # if triggered via workflow_dispatch, this can run other quickstart variants, so lets build all images to allow that. | |
| # we still dont need matrixed builds since this is for smoke test only. | |
| BUILD_TASK=":docker:buildImagesAll" | |
| else | |
| BUILD_TASK=":docker:buildImagesQuickstart" | |
| fi | |
| ./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }} -PincludeAlpineVariants=${{ env.INCLUDE_ALPINE_VARIANTS }} | |
| - name: Build all Images (Publish) | |
| if: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} | |
| # since this is for publishing, we will build all images, not just those for smoke tests. But will publish only if tests pass for publish (head images, releases). | |
| # for pr-publish, publish images without waiting for tests to pass. | |
| run: | | |
| ./gradlew :docker:buildImagesAll -PmatrixBuild=true -Ptag=${{ needs.setup.outputs.tag }} -PshaTag=${{ needs.setup.outputs.short_sha }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }} -PdockerPush=${{ needs.setup.outputs.pr-publish }} -PincludeAlpineVariants=${{ env.INCLUDE_ALPINE_VARIANTS }} | |
| - name: Capture build Id | |
| id: capture-build-id | |
| run: | | |
| pip install jq | |
| DEPOT_BUILD_ID=$(jq -r '.["depot.build"]?.buildID' "${{ github.workspace }}/build/build-metadata.json") | |
| echo "build_id=${DEPOT_BUILD_ID}" >> "$GITHUB_OUTPUT" | |
| echo "matrix=$(jq -c '{"target":.["depot.build"].targets}' "${{ github.workspace }}/build/build-metadata.json")" >> "$GITHUB_OUTPUT" | |
| - name: Save build Metadata | |
| if: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: build-metadata-${{ needs.setup.outputs.tag }} | |
| path: | | |
| ${{ github.workspace }}/build/build-metadata.json | |
| ${{ github.workspace }}/build/bake-spec-allImages.json | |
| - uses: actions/cache/save@v4 | |
| if: ${{ github.ref == 'refs/heads/master' }} | |
| with: | |
| path: | | |
| ~/.cache/uv | |
| key: ${{ needs.setup.outputs.uv_cache_key }} | |
| - uses: actions/cache/save@v4 | |
| if: ${{ github.ref == 'refs/heads/master' }} | |
| with: | |
| path: | | |
| ~/.cache/yarn | |
| key: ${{ needs.setup.outputs.yarn_cache_key }} | |
| - uses: actions/cache/save@v4 | |
| if: ${{ github.ref == 'refs/heads/master' }} | |
| with: | |
| path: | | |
| ~/.gradle/wrapper | |
| ~/.gradle/caches/modules-2 | |
| ~/.gradle/caches/jars-* | |
| ~/.gradle/caches/transforms-* | |
| key: gradle-plugins-cache | |
| scan_images: | |
| permissions: | |
| contents: read # for actions/checkout to fetch code | |
| security-events: write # for github/codeql-action/upload-sarif to upload SARIF results | |
| actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status | |
| name: Scan images for vulnerabilities | |
| runs-on: depot-ubuntu-24.04 | |
| needs: [setup, base_build] | |
| if: ${{ needs.setup.outputs.publish == 'true' }} | |
| strategy: | |
| fail-fast: false | |
| matrix: ${{ fromJson(needs.base_build.outputs.matrix) }} | |
| steps: | |
| - name: Checkout # adding checkout step just to make trivy upload happy | |
| uses: acryldata/sane-checkout-action@v4 | |
| - id: download_image | |
| name: Download images from depot | |
| if: ${{ needs.setup.outputs.use_depot_cache == 'true' }} | |
| run: | | |
| depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}" --target "${{ matrix.target}}" | |
| docker images | |
| echo "docker_image=$(docker images --format '{{.Repository}}:{{.Tag}}' | grep "${{ needs.setup.outputs.tag }}" )" >> "$GITHUB_OUTPUT" | |
| - name: Run Trivy vulnerability scanner | |
| uses: aquasecurity/[email protected] | |
| env: | |
| TRIVY_OFFLINE_SCAN: true | |
| TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2 | |
| TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1 | |
| with: | |
| image-ref: ${{ steps.download_image.outputs.docker_image }} | |
| format: "template" | |
| template: "@/contrib/sarif.tpl" | |
| output: "trivy-results.sarif" | |
| severity: "CRITICAL,HIGH" | |
| ignore-unfixed: true | |
| vuln-type: "os,library" | |
| trivy-config: "./trivy.yaml" | |
| - name: Upload Trivy scan results to GitHub Security tab | |
| uses: github/codeql-action/upload-sarif@v4 | |
| with: | |
| sarif_file: "trivy-results.sarif" | |
| smoke_test_matrix: | |
| runs-on: ${{ needs.setup.outputs.test_runner_type_small }} | |
| needs: setup | |
| outputs: | |
| matrix: ${{ steps.set-matrix.outputs.matrix }} | |
| cypress_batch_count: ${{ steps.set-batch-count.outputs.cypress_batch_count }} | |
| python_batch_count: ${{ steps.set-batch-count.outputs.python_batch_count }} | |
| steps: | |
| - id: set-batch-count | |
| # Tests are split simply to ensure the configured number of batches for parallelization. This may need some | |
| # increase as a new tests added increase the duration where an additional parallel batch helps. | |
| # python_batch_count is used to split pytests in the smoke-test (batches of actual test functions) | |
| # cypress_batch_count is used to split the collection of cypress test specs into batches. | |
| run: | | |
| if [[ "${{ env.IS_FORK }}" == "true" ]]; then | |
| echo "cypress_batch_count=5" >> "$GITHUB_OUTPUT" | |
| echo "python_batch_count=3" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "cypress_batch_count=8" >> "$GITHUB_OUTPUT" | |
| echo "python_batch_count=7" >> "$GITHUB_OUTPUT" | |
| fi | |
| - id: set-matrix | |
| # For m batches for python and n batches for cypress, we need a test matrix of python x m + cypress x n. | |
| # while the github action matrix generation can handle these two parts individually, there isnt a way to use the | |
| # two generated matrices for the same job. So, produce that matrix with scripting and use the include directive | |
| # to add it to the test matrix. | |
| run: | | |
| python_batch_count=${{ steps.set-batch-count.outputs.python_batch_count }} | |
| python_matrix='{"test_strategy":"pytests","batch":"0","batch_count":"'"$python_batch_count"'"}' | |
| for ((i=1;i<python_batch_count;i++)); do | |
| python_matrix="$python_matrix"',{"test_strategy":"pytests","batch_count":"'"$python_batch_count"'","batch":"'"$i"'"}' | |
| done | |
| cypress_batch_count=${{ steps.set-batch-count.outputs.cypress_batch_count }} | |
| cypress_matrix='{"test_strategy":"cypress","batch":"0","batch_count":"'"$cypress_batch_count"'"}' | |
| for ((i=1;i<cypress_batch_count;i++)); do | |
| cypress_matrix="$cypress_matrix"',{"test_strategy":"cypress","batch_count":"'"$cypress_batch_count"'","batch":"'"$i"'"}' | |
| done | |
| includes='' | |
| if [[ "${{ needs.setup.outputs.backend_change }}" == 'true' || "${{ needs.setup.outputs.smoke_test_change }}" == 'true' || "${{ needs.setup.outputs.publish }}" == 'true' ]]; then | |
| includes="$python_matrix,$cypress_matrix" | |
| elif [[ "${{ needs.setup.outputs.frontend_only }}" == 'true' ]]; then | |
| includes="$cypress_matrix" | |
| elif [[ "${{ needs.setup.outputs.ingestion_only }}" == 'true' ]]; then | |
| includes="$python_matrix" | |
| fi | |
| echo "matrix={\"include\":[$includes] }" >> "$GITHUB_OUTPUT" | |
| smoke_test: | |
| name: Run Smoke Tests (${{ matrix.test_strategy }}, Batch ${{ matrix.batch }}/${{ matrix.batch_count }}) | |
| runs-on: ${{ needs.setup.outputs.test_runner_type }} | |
| needs: [setup, smoke_test_matrix, base_build] | |
| strategy: | |
| fail-fast: false | |
| matrix: ${{ fromJson(needs.smoke_test_matrix.outputs.matrix || '{"include":[]}') }} | |
| if: ${{ always() && !failure() && !cancelled() && needs.smoke_test_matrix.outputs.matrix != '' && needs.smoke_test_matrix.outputs.matrix != '{"include":[]}' }} | |
| env: | |
| # TODO Chakru: Review if required | |
| MIXPANEL_API_SECRET: ${{ secrets.MIXPANEL_API_SECRET }} | |
| MIXPANEL_PROJECT_ID: ${{ secrets.MIXPANEL_PROJECT_ID }} | |
| steps: | |
| - name: Free up disk space | |
| if: ${{ needs.setup.outputs.use_depot_cache != 'true' }} | |
| run: | | |
| sudo apt-get remove 'dotnet-*' azure-cli || true | |
| sudo rm -rf /usr/local/.ghcup || true | |
| sudo rm -rf /usr/share/dotnet || true | |
| sudo rm -rf /usr/share/swift || true | |
| sudo rm -rf /usr/local/julia* || true | |
| sudo rm -rf /usr/local/share/powershell || true | |
| sudo rm -rf /usr/share/miniconda || true | |
| sudo rm -rf /usr/local/lib/android/ || true | |
| sudo docker system prune -a -f || true | |
| df -h | |
| - uses: actions/cache/restore@v4 | |
| with: | |
| path: | | |
| ~/.cache/uv | |
| key: ${{ needs.setup.outputs.uv_cache_key }} | |
| restore-keys: | | |
| ${{ needs.setup.outputs.uv_cache_key_prefix }} | |
| - uses: actions/cache/restore@v4 | |
| with: | |
| path: | | |
| ~/.cache/yarn | |
| key: ${{ needs.setup.outputs.yarn_cache_key }} | |
| restore-keys: | | |
| ${{ needs.setup.outputs.yarn_cache_key_prefix }} | |
| - name: Check out the repo | |
| uses: acryldata/sane-checkout-action@v4 | |
| with: | |
| checkout-head-only: false | |
| - name: Set up Depot CLI | |
| if: ${{ needs.setup.outputs.use_depot_cache == 'true' }} | |
| uses: depot/setup-action@v1 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.11" | |
| cache: "pip" | |
| - name: Detect workflow retry | |
| id: retry-detection | |
| run: | | |
| if [[ "${{ github.run_attempt }}" -gt 1 ]]; then | |
| echo "This is retry attempt ${{ github.run_attempt }}" | |
| echo "is_retry=true" >> "$GITHUB_OUTPUT" | |
| PREVIOUS_ATTEMPT=$(($(echo "${{ github.run_attempt }}") - 1)) | |
| echo "previous_attempt=${PREVIOUS_ATTEMPT}" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "This is the first attempt" | |
| echo "is_retry=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Download previous test results | |
| if: steps.retry-detection.outputs.is_retry == 'true' | |
| id: download-artifacts | |
| continue-on-error: true | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| set +e | |
| echo "Downloading artifacts from run ${{ github.run_id }}, attempt ${{ steps.retry-detection.outputs.previous_attempt }}" | |
| # Create directory for previous results | |
| mkdir -p "${{ github.workspace }}/previous-test-results" | |
| # Get artifact ID for this batch's test results | |
| ARTIFACT_NAME="Test Results (smoke tests) ${{ matrix.test_strategy }} ${{ matrix.batch }}" | |
| echo "Looking for artifact: ${ARTIFACT_NAME}" | |
| # Query artifacts for this workflow run | |
| ARTIFACT_ID=$(gh api "repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts" \ | |
| --jq ".artifacts[] | select(.name == \"${ARTIFACT_NAME}\") | .id" | head -1) | |
| if [[ -z "$ARTIFACT_ID" ]]; then | |
| echo "No artifact found for batch ${{ matrix.batch }}" | |
| echo "download_success=false" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| echo "Found artifact ID: ${ARTIFACT_ID}" | |
| # Download and extract artifact | |
| cd "${{ github.workspace }}/previous-test-results" | |
| gh api "repos/${{ github.repository }}/actions/artifacts/${ARTIFACT_ID}/zip" > artifact.zip | |
| unzip -q artifact.zip | |
| # Verify we got XML files | |
| if [[ "${{ matrix.test_strategy }}" == "cypress" ]]; then | |
| # Cypress XMLs are in smoke-test/tests/cypress/build/smoke-test-results/ | |
| if find . -path "*/smoke-test-results/cypress-test-*.xml" -print -quit | grep -q .; then | |
| echo "Successfully downloaded cypress test results" | |
| echo "download_success=true" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "No cypress test XML files found in artifact" | |
| echo "download_success=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| else | |
| # Pytest XMLs are in smoke-test/junit.*.xml | |
| if find . -path "*/junit*.xml" -print -quit | grep -q .; then | |
| echo "Successfully downloaded pytest test results" | |
| echo "download_success=true" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "No pytest XML files found in artifact" | |
| echo "download_success=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| fi | |
| - name: Parse failed Cypress tests | |
| if: | | |
| steps.retry-detection.outputs.is_retry == 'true' && | |
| matrix.test_strategy == 'cypress' && | |
| steps.download-artifacts.outputs.download_success == 'true' | |
| id: parse-cypress-failures | |
| run: | | |
| set +e | |
| OUTPUT_FILE="${{ github.workspace }}/failed-tests-batch-${{ matrix.batch }}.txt" | |
| python3 .github/scripts/parse_failed_cypress_tests.py \ | |
| --input-dir "${{ github.workspace }}/previous-test-results" \ | |
| --output "${OUTPUT_FILE}" | |
| EXIT_CODE=$? | |
| case $EXIT_CODE in | |
| 0) | |
| echo "parse_result=has_failures" >> "$GITHUB_OUTPUT" | |
| echo "filtered_tests_file=${OUTPUT_FILE}" >> "$GITHUB_OUTPUT" | |
| echo "Will retry $(wc -l < ${OUTPUT_FILE}) failed test(s)" | |
| ;; | |
| 2) | |
| echo "parse_result=all_passed" >> "$GITHUB_OUTPUT" | |
| echo "All tests passed in previous attempt - will skip batch" | |
| ;; | |
| 3) | |
| echo "parse_result=no_artifacts" >> "$GITHUB_OUTPUT" | |
| echo "No test results found - will run all tests" | |
| ;; | |
| *) | |
| echo "parse_result=error" >> "$GITHUB_OUTPUT" | |
| echo "Error parsing test results - will run all tests" | |
| ;; | |
| esac | |
| - name: Parse failed pytest modules | |
| if: | | |
| steps.retry-detection.outputs.is_retry == 'true' && | |
| matrix.test_strategy == 'pytests' && | |
| steps.download-artifacts.outputs.download_success == 'true' | |
| id: parse-pytest-failures | |
| run: | | |
| set +e | |
| OUTPUT_FILE="${{ github.workspace }}/failed-modules-batch-${{ matrix.batch }}.txt" | |
| python3 .github/scripts/parse_failed_pytest_tests.py \ | |
| --input-dir "${{ github.workspace }}/previous-test-results" \ | |
| --output "${OUTPUT_FILE}" | |
| EXIT_CODE=$? | |
| case $EXIT_CODE in | |
| 0) | |
| echo "parse_result=has_failures" >> "$GITHUB_OUTPUT" | |
| echo "filtered_tests_file=${OUTPUT_FILE}" >> "$GITHUB_OUTPUT" | |
| echo "Will retry $(wc -l < ${OUTPUT_FILE}) failed module(s)" | |
| ;; | |
| 2) | |
| echo "parse_result=all_passed" >> "$GITHUB_OUTPUT" | |
| echo "All tests passed in previous attempt - will skip batch" | |
| ;; | |
| 3) | |
| echo "parse_result=no_artifacts" >> "$GITHUB_OUTPUT" | |
| echo "No test results found - will run all tests" | |
| ;; | |
| *) | |
| echo "parse_result=error" >> "$GITHUB_OUTPUT" | |
| echo "Error parsing test results - will run all tests" | |
| ;; | |
| esac | |
| - name: Clean up downloaded artifacts | |
| if: steps.retry-detection.outputs.is_retry == 'true' | |
| run: | | |
| # Cleaning up downloaded test results to prevent contamination of current run | |
| rm -rf "${{ github.workspace }}/previous-test-results" | |
| - name: Skip batch if all tests passed | |
| if: | | |
| steps.parse-cypress-failures.outputs.parse_result == 'all_passed' || | |
| steps.parse-pytest-failures.outputs.parse_result == 'all_passed' | |
| run: | | |
| echo "✓ All tests passed in previous attempt for ${{ matrix.test_strategy }} batch ${{ matrix.batch }}" | |
| echo "Skipping this batch to optimize CI time (Docker images, quickstart, and tests)" | |
| exit 0 | |
| - uses: gradle/actions/setup-gradle@v4 | |
| if: ${{ needs.setup.outputs.use_depot_cache != 'true' }} | |
| - name: Login to DockerHub | |
| uses: docker/login-action@v3 | |
| if: ${{ needs.setup.outputs.docker-login == 'true' }} | |
| with: | |
| username: ${{ secrets.ACRYL_DOCKER_USERNAME }} | |
| password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} | |
| - name: Disk Space Analysis | |
| run: | | |
| echo "=== Disk Usage Overview ===" | |
| df -h | |
| echo -e "\n=== Docker Disk Usage ===" | |
| docker system df -v | |
| - name: build images | |
| if: ${{ needs.setup.outputs.use_depot_cache != 'true' }} | |
| run: | | |
| ./gradlew :docker:buildImagesQuickstartDebugConsumers -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }} -PincludeAlpineVariants=${{ env.INCLUDE_ALPINE_VARIANTS }} | |
| docker images | |
| env: | |
| DOCKER_CACHE: GITHUB | |
| - name: pull images from depot | |
| if: ${{ needs.setup.outputs.use_depot_cache == 'true' }} | |
| run: | | |
| depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}" | |
| docker images | |
| - name: Disk Space Analysis | |
| run: | | |
| echo "=== Disk Usage Overview ===" | |
| df -h | |
| echo -e "\n=== Docker Disk Usage ===" | |
| docker system df -v | |
| - name: run quickstart | |
| env: | |
| DATAHUB_TELEMETRY_ENABLED: false | |
| DATAHUB_VERSION: ${{ needs.setup.outputs.tag }} | |
| DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_ACTIONS_IMAGE }} | |
| ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions" | |
| ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml" | |
| run: | | |
| # Quickstart uses PROFILE_NAME env if defined to start the profile specified. Defaults to quickstart-consumers | |
| ./smoke-test/run-quickstart.sh | |
| - name: Disk Check | |
| run: df -h . && docker images | |
| - name: Disable ES Disk Threshold | |
| run: | | |
| curl -XPUT "http://localhost:9200/_cluster/settings" \ | |
| -H 'Content-Type: application/json' -d'{ | |
| "persistent": { | |
| "cluster": { | |
| "routing": { | |
| "allocation.disk.threshold_enabled": false | |
| } | |
| } | |
| } | |
| }' | |
| - name: Install dependencies | |
| run: ./metadata-ingestion/scripts/install_deps.sh | |
| - name: Build datahub cli | |
| run: | | |
| ./gradlew :metadata-ingestion:install | |
| - name: Smoke test | |
| if: | | |
| steps.parse-cypress-failures.outputs.parse_result != 'all_passed' && | |
| steps.parse-pytest-failures.outputs.parse_result != 'all_passed' | |
| env: | |
| RUN_QUICKSTART: false | |
| DATAHUB_VERSION: ${{ needs.setup.outputs.tag }} | |
| CYPRESS_RECORD_KEY: ${{ secrets.CYPRESS_RECORD_KEY }} | |
| CLEANUP_DATA: "false" | |
| TEST_STRATEGY: ${{ matrix.test_strategy }} | |
| BATCH_COUNT: ${{ matrix.batch_count }} | |
| BATCH_NUMBER: ${{ matrix.batch }} | |
| FILTERED_TESTS: ${{ steps.parse-cypress-failures.outputs.filtered_tests_file || steps.parse-pytest-failures.outputs.filtered_tests_file || '' }} | |
| run: | | |
| if [[ -n "$FILTERED_TESTS" && -f "$FILTERED_TESTS" ]]; then | |
| echo "==========================================" | |
| if [[ "${{ matrix.test_strategy }}" == "cypress" ]]; then | |
| echo "RETRY MODE: Running only failed Cypress tests" | |
| else | |
| echo "RETRY MODE: Running only failed pytest modules" | |
| fi | |
| echo "==========================================" | |
| echo "Failed items to retry:" | |
| cat "$FILTERED_TESTS" | |
| echo "==========================================" | |
| elif [[ "${{ steps.retry-detection.outputs.is_retry }}" == "true" ]]; then | |
| echo "RETRY MODE: Running all tests (fallback)" | |
| fi | |
| echo "$DATAHUB_VERSION" | |
| ./gradlew --stop | |
| ./smoke-test/smoke.sh | |
| - name: Java SDK V2 Integration Tests | |
| if: ${{ (needs.setup.outputs.backend_change == 'true' || needs.setup.outputs.java_client_change == 'true') && matrix.batch == '0' }} | |
| env: | |
| DATAHUB_SERVER: http://localhost:8080 | |
| ADMIN_USERNAME: datahub | |
| ADMIN_PASSWORD: datahub | |
| run: | | |
| echo "Running Java SDK V2 integration tests against running DataHub instance..." | |
| ./gradlew :metadata-integration:java:datahub-client:test --tests "*Integration*" | |
| - name: Upload Java SDK V2 coverage to Codecov | |
| if: ${{ always() && (needs.setup.outputs.backend_change == 'true' || needs.setup.outputs.java_client_change == 'true') && matrix.batch == '0' }} | |
| uses: codecov/codecov-action@v5 | |
| with: | |
| token: ${{ secrets.CODECOV_TOKEN }} | |
| directory: ./build/coverage-reports/metadata-integration/java/datahub-client/ | |
| flags: metadata-integration | |
| name: java-sdk-v2-integration | |
| fail_ci_if_error: false | |
| verbose: true | |
| override_branch: ${{ github.head_ref || github.ref_name }} | |
| - name: Disk Check | |
| run: df -h . && docker images | |
| - name: store logs | |
| if: failure() | |
| run: | | |
| docker ps -a | |
| TEST_STRATEGY="-${{ matrix.test_strategy }}-${{ matrix.batch }}" | |
| source .github/scripts/docker_logs.sh | |
| - name: Upload logs | |
| uses: actions/upload-artifact@v4 | |
| if: failure() | |
| with: | |
| name: docker-logs-${{ matrix.test_strategy }}-${{ matrix.batch }} | |
| path: "docker_logs/*.log" | |
| retention-days: 5 | |
| - name: Upload screenshots | |
| uses: actions/upload-artifact@v4 | |
| if: failure() | |
| with: | |
| name: cypress-snapshots-${{ matrix.test_strategy }}-${{ matrix.batch }} | |
| path: smoke-test/tests/cypress/cypress/screenshots/ | |
| - uses: actions/upload-artifact@v4 | |
| if: always() | |
| with: | |
| name: Test Results (smoke tests) ${{ matrix.test_strategy }} ${{ matrix.batch }} | |
| path: | | |
| **/build/reports/tests/test/** | |
| **/build/test-results/test/** | |
| **/smoke-test-results/cypress-test-*.xml | |
| **/junit.*.xml | |
| !**/binary/** | |
| - name: Send failed test metrics to PostHog | |
| if: failure() | |
| continue-on-error: true | |
| env: | |
| POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }} | |
| POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }} | |
| run: | | |
| if [ -z "$POSTHOG_API_KEY" ]; then | |
| echo "⚠️ POSTHOG_API_KEY not configured, skipping test failure metrics" | |
| exit 0 | |
| fi | |
| TEMP_DIR=$(mktemp -d) | |
| mkdir -p "$TEMP_DIR/test-results" | |
| find . -name "*.xml" -path "*/build/test-results/*" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null || true | |
| find . -name "cypress-test-*.xml" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null || true | |
| find . -name "junit.*.xml" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null || true | |
| python3 .github/scripts/send_failed_tests_to_posthog.py \ | |
| --input-dir "$TEMP_DIR/test-results" \ | |
| --posthog-api-key "$POSTHOG_API_KEY" \ | |
| --posthog-host "${POSTHOG_HOST:-https://app.posthog.com}" \ | |
| --repository "${{ github.repository }}" \ | |
| --workflow-name "${{ github.workflow }}" \ | |
| --branch "${{ github.head_ref || github.ref_name }}" \ | |
| --run-id "${{ github.run_id }}" \ | |
| --run-attempt "${{ github.run_attempt }}" \ | |
| --batch "${{ matrix.batch }}" \ | |
| --batch-count "${{ strategy.job-total }}" \ | |
| --test-strategy "${{ matrix.test_strategy }}" | |
| rm -rf "$TEMP_DIR" | |
| - name: Upload test results to Codecov | |
| if: ${{ !cancelled() }} | |
| uses: codecov/test-results-action@v1 | |
| with: | |
| token: ${{ secrets.CODECOV_TOKEN }} | |
| override_branch: ${{ github.head_ref || github.ref_name }} | |
| - uses: actions/cache/save@v4 | |
| if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }} | |
| # The cache does not need to be saved by all the parallel workers. The cache contents is not dependent on tests. | |
| with: | |
| path: | | |
| ~/.cache/uv | |
| key: ${{ needs.setup.outputs.uv_cache_key }} | |
| - uses: actions/cache/save@v4 | |
| if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }} | |
| with: | |
| path: | | |
| ~/.cache/yarn | |
| key: ${{ needs.setup.outputs.yarn_cache_key }} | |
| publish_images: | |
| name: Push images after tests pass | |
| runs-on: ${{ needs.setup.outputs.test_runner_type_small || 'ubuntu-latest' }} | |
| needs: [setup, smoke_test, base_build] | |
| if: ${{ always() && !failure() && !cancelled() && needs.setup.result != 'skipped' }} | |
| steps: | |
| - name: Check if tests have passed | |
| id: tests_passed | |
| run: | | |
| # Check the overall result of the matrix job | |
| # Matrix jobs can have mixed results, so we check for any failures | |
| if [[ "${{ needs.smoke_test.result }}" == "failure" ]]; then | |
| echo "Smoke tests failed, skipping image pushing" | |
| echo "tests_passed=false" >> "$GITHUB_OUTPUT" | |
| exit 1 | |
| elif [[ "${{ needs.smoke_test.result }}" == "cancelled" ]]; then | |
| echo "Smoke tests were cancelled, skipping image pushing" | |
| echo "tests_passed=false" >> "$GITHUB_OUTPUT" | |
| exit 1 | |
| else | |
| echo "Smoke tests completed successfully, proceeding with image pushing" | |
| echo "tests_passed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Set up Depot CLI | |
| if: ${{ steps.tests_passed.outputs.tests_passed == 'true' && needs.setup.outputs.use_depot_cache == 'true' }} | |
| uses: depot/setup-action@v1 | |
| - name: Login to DockerHub | |
| uses: docker/login-action@v3 | |
| if: ${{ steps.tests_passed.outputs.tests_passed == 'true' && needs.setup.outputs.docker-login == 'true' }} | |
| with: | |
| username: ${{ secrets.ACRYL_DOCKER_USERNAME }} | |
| password: ${{ secrets.ACRYL_DOCKER_PASSWORD }} | |
| - name: Download build Metadata | |
| if: ${{ needs.setup.outputs.publish == 'true' || needs.setup.outputs.pr-publish == 'true' }} | |
| uses: actions/download-artifact@v6 | |
| with: | |
| name: build-metadata-${{ needs.setup.outputs.tag }} | |
| path: ${{ github.workspace }}/build | |
| - name: Push images from depot builder | |
| if: ${{ steps.tests_passed.outputs.tests_passed == 'true' && needs.setup.outputs.use_depot_cache == 'true' && needs.setup.outputs.publish == 'true' }} | |
| run: | | |
| set -euo pipefail | |
| depot bake -f "${{ github.workspace }}/build/bake-spec-allImages.json" --print | jq -c '.target | to_entries | map({target: .key, tags: .value.tags[]})'| jq -c '.[]' | while IFS= read -r line; do | |
| TARGET=$(echo "$line" | jq -r '.target') | |
| TAG=$(echo "$line" | jq -r '.tags') | |
| depot push --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}" --target "$TARGET" --tag "$TAG" | |
| done | |
| deploy_datahub_head: | |
| name: Deploy to Datahub HEAD | |
| runs-on: ubuntu-latest | |
| needs: [setup, smoke_test_lint, smoke_test, publish_images] | |
| steps: | |
| - uses: aws-actions/configure-aws-credentials@v5 | |
| if: ${{ needs.setup.outputs.publish != 'false' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }} | |
| with: | |
| aws-access-key-id: ${{ secrets.AWS_SQS_ACCESS_KEY_ID }} | |
| aws-secret-access-key: ${{ secrets.AWS_SQS_ACCESS_KEY }} | |
| aws-region: us-west-2 | |
| - uses: isbang/[email protected] | |
| if: ${{ needs.setup.outputs.publish != 'false' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }} | |
| with: | |
| sqs-url: ${{ secrets.DATAHUB_HEAD_SYNC_QUEUE }} | |
| message: '{ "command": "git-sync", "args" : {"repoName": "${{ needs.setup.outputs.repository_name }}", "repoOrg": "${{ github.repository_owner }}", "repoBranch": "${{ needs.setup.outputs.branch_name }}", "repoShaShort": "${{ needs.setup.outputs.short_sha }}" }}' |