feat(bigquery): Add pushdown_deny_usernames and pushdown_allow_usernames for server-side user filtering #48113

Workflow file for this run

.github/workflows/docker-unified.yml at 82505be

	name: Docker Build, Scan, Test
	on:
	workflow_dispatch:
	inputs:
	profileName:
	description: "Profile name for the smoke-test. Defaults to quickstart-consumers if not specified"
	required: false
	default: "quickstart-consumers"
	type: string
	push:
	branches:
	- master
	- releases/**
	pull_request:
	types: [opened, synchronize, reopened, labeled]
	branches:
	- "**"
	release:
	types: [published]

	concurrency:
	# Using `github.run_id` (unique val) instead of `github.ref` here
	# because we don't want to cancel this workflow on master only for PRs
	# as that makes reproducing issues easier
	# Adding github.event.action == labeled as a means to differentiate the trigger due to adding a label -- most labels are
	# no-ops except for `depot`
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.run_id }}-${{ github.event.action == 'labeled' }}
	cancel-in-progress: true

	env:
	DOCKER_REGISTRY: "acryldata"
	PROFILE_NAME: "${{ github.event.inputs.profileName \|\| 'quickstart-consumers' }}"

	DOCKER_CACHE: "DEPOT"
	DEPOT_PROJECT_ID: "s0gr1cr3jd"
	HAS_DEPOT_LABEL: ${{ github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'depot') }}
	# Include Alpine variants for releases, or when PR has 'build-alpine-variant' label
	INCLUDE_ALPINE_VARIANTS: ${{ github.event_name == 'release' \|\| (github.event_name == 'pull_request' && contains(github.event.pull_request.labels.*.name, 'build-alpine-variant')) }}
	IS_FORK: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.repository }}
	DEPOT_TOKEN: "${{ secrets.DEPOT_TOKEN }}"

	permissions:
	contents: read
	id-token: write

	jobs:
	setup:
	runs-on: depot-ubuntu-24.04-small
	if: ${{ github.event_name != 'pull_request' \|\| github.event.action != 'labeled' \|\| github.event.label.name == 'depot' }}
	outputs:
	# TODO: Many of the vars below should not be required anymore.
	tag: ${{ steps.tag.outputs.tag }}
	slim_tag: ${{ steps.tag.outputs.slim_tag }}
	full_tag: ${{ steps.tag.outputs.full_tag }}
	short_sha: ${{ steps.tag.outputs.short_sha }} # needed for auto-deploy
	unique_tag: ${{ steps.tag.outputs.unique_tag }}
	unique_slim_tag: ${{ steps.tag.outputs.unique_slim_tag }}
	unique_full_tag: ${{ steps.tag.outputs.unique_full_tag }}
	docker-login: ${{ steps.docker-login.outputs.docker-login }}
	publish: ${{ steps.publish.outputs.publish }}
	pr-publish: ${{ steps.pr-publish.outputs.publish }}
	python_release_version: ${{ steps.tag.outputs.python_release_version }}
	branch_name: ${{ steps.tag.outputs.branch_name }}
	repository_name: ${{ steps.tag.outputs.repository_name }}
	frontend_change: ${{ steps.ci-optimize.outputs.frontend-change == 'true' \|\| github.event_name != 'pull_request' }}
	actions_change: ${{ steps.ci-optimize.outputs.actions-change == 'true' \|\| github.event_name != 'pull_request'}}
	ingestion_change: ${{ steps.ci-optimize.outputs.ingestion-change == 'true' \|\| github.event_name != 'pull_request' }}
	ingestion_base_change: ${{ steps.ci-optimize.outputs.ingestion-base-change == 'true' }}
	backend_change: ${{ steps.ci-optimize.outputs.backend-change == 'true' \|\| github.event_name != 'pull_request' }}
	frontend_only: ${{ steps.ci-optimize.outputs.frontend-only == 'true' }}
	ingestion_only: ${{ steps.ci-optimize.outputs.ingestion-only == 'true' }}
	backend_only: ${{ steps.ci-optimize.outputs.backend-only == 'true' }}
	kafka_setup_change: ${{ steps.ci-optimize.outputs.kafka-setup-change == 'true' }}
	mysql_setup_change: ${{ steps.ci-optimize.outputs.mysql-setup-change == 'true' }}
	postgres_setup_change: ${{ steps.ci-optimize.outputs.postgres-setup-change == 'true' }}
	elasticsearch_setup_change: ${{ steps.ci-optimize.outputs.elasticsearch-setup-change == 'true' }}
	smoke_test_change: ${{ steps.ci-optimize.outputs.smoke-test-change == 'true' }}
	java_client_change: ${{ steps.ci-optimize.outputs.java-client-change == 'true' \|\| github.event_name != 'pull_request' }}
	integrations_service_change: "false"
	datahub_executor_change: "false"

	build_runner_type: ${{ steps.set-runner.outputs.build_runner_type }}
	test_runner_type: ${{ steps.set-runner.outputs.test_runner_type }}
	test_runner_type_small: ${{ steps.set-runner.outputs.test_runner_type_small }}
	use_depot_cache: ${{ steps.set-runner.outputs.use_depot_cache }}
	uv_cache_key: ${{ steps.uv-cache-key.outputs.uv_cache_key }}
	uv_cache_key_prefix: ${{ steps.uv-cache-key.outputs.uv_cache_key_prefix }}
	yarn_cache_key: ${{ steps.yarn-cache-key.outputs.yarn_cache_key }}
	yarn_cache_key_prefix: ${{ steps.yarn-cache-key.outputs.yarn_cache_key_prefix }}
	steps:
	- name: Check out the repo
	uses: acryldata/sane-checkout-action@v4
	- name: Compute Tag
	id: tag
	env:
	GITHUB_REF_FALLBACK: ${{ github.event_name == 'release' && format('refs/tags/{0}', github.event.release.tag_name) \|\| github.ref}}
	GITHUB_EVENT_NAME: ${{ github.event_name }}
	run: \|
	source .github/scripts/docker_helpers.sh
	{
	echo "short_sha=${SHORT_SHA}"
	echo "tag=$(get_tag)"
	echo "slim_tag=$(get_tag_slim)"
	echo "full_tag=$(get_tag_full)"
	echo "unique_tag=$(get_unique_tag)"
	echo "unique_slim_tag=$(get_unique_tag_slim)"
	echo "unique_full_tag=$(get_unique_tag_full)"
	echo "python_release_version=$(get_python_docker_release_v)"
	echo "branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}"
	echo "repository_name=${GITHUB_REPOSITORY#*/}"
	} >> "$GITHUB_OUTPUT"
	- name: Check whether docker login is possible
	id: docker-login
	env:
	ENABLE_DOCKER_LOGIN: ${{ secrets.ACRYL_DOCKER_PASSWORD != '' }}
	run: \|
	echo "Enable Docker Login: ${{ env.ENABLE_DOCKER_LOGIN }}"
	echo "docker-login=${{ env.ENABLE_DOCKER_LOGIN }}" >> "$GITHUB_OUTPUT"
	- name: Check whether publishing enabled
	id: publish
	env:
	ENABLE_PUBLISH: >-
	${{
	(github.event_name == 'release' \|\| ((github.event_name == 'workflow_dispatch' \|\| github.event_name == 'push') && github.ref == 'refs/heads/master'))
	&& ( secrets.ACRYL_DOCKER_PASSWORD != '' )
	}}
	run: \|
	echo "Enable publish: ${{ env.ENABLE_PUBLISH }}"
	echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
	- name: Check whether PR publishing enabled
	id: pr-publish
	env:
	ENABLE_PUBLISH: >-
	${{
	(github.event_name == 'pull_request' && (contains(github.event.pull_request.labels..name, 'publish') \|\| contains(github.event.pull_request.labels..name, 'publish-docker')))
	&& ( secrets.ACRYL_DOCKER_PASSWORD != '' )
	}}
	run: \|
	echo "Enable PR publish: ${{ env.ENABLE_PUBLISH }}"
	echo "publish=${{ env.ENABLE_PUBLISH }}" >> "$GITHUB_OUTPUT"
	- uses: ./.github/actions/ci-optimization
	id: ci-optimize

	- name: Determine runner type
	id: set-runner
	# This needs to handle two scenarios:
	# 1. Running on a PR from a fork. We use github runners, unless the "depot" label exists -- in which case, we run
	# it on depotNote, concurrency is lower when using github runners, queue times can be longer, test time is longer
	# due to fewer parallel jobs.
	# 3. Running on a PR from a branch in the datahub-project org and push/schedule events on master.
	# Depot is used here for remote container builds in base_build and also for all runners. Depot runners support unlimited concurrency
	# and hence short queue times and higher parallelism of smoke tests
	run: \|
	if [[ "${{ env.DOCKER_CACHE }}" == "DEPOT" && "${{ env.IS_FORK }}" == "false" ]]; then
	echo "build_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT"
	echo "test_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT"
	echo "test_runner_type_small=depot-ubuntu-24.04-small" >> "$GITHUB_OUTPUT"
	echo "use_depot_cache=true" >> "$GITHUB_OUTPUT"
	else
	echo "build_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
	if [[ "${{ env.HAS_DEPOT_LABEL }}" == "true" ]]; then
	echo "test_runner_type=depot-ubuntu-24.04-4" >> "$GITHUB_OUTPUT"
	else
	echo "test_runner_type=ubuntu-latest" >> "$GITHUB_OUTPUT"
	fi
	echo "test_runner_type_small=ubuntu-latest" >> "$GITHUB_OUTPUT"
	echo "use_depot_cache=false" >> "$GITHUB_OUTPUT"
	# publishing is currently only supported via depot
	fi

	- name: Compute UV Cache Key
	id: uv-cache-key
	run: \|
	echo "uv_cache_key=docker-unified-${{ runner.os }}-uv-${{ hashFiles(
	'./datahub-actions/pyproject.toml',
	'./datahub-actions/setup.py',
	'./smoke-test/requirements.txt',
	'./smoke-test/pyproject.toml',
	'./metadata-ingestion/pyproject.toml',
	'./metadata-ingestion/setup.py') }}" >> "$GITHUB_OUTPUT"
	echo "uv_cache_key_prefix=docker-unified-${{ runner.os }}-uv-" >> "$GITHUB_OUTPUT"

	- name: Compute Yarn Cache Key
	id: yarn-cache-key
	run: \|
	echo "yarn_cache_key=docker-unified-${{ runner.os }}-yarn-${{ hashFiles('./smoke-test/tests/cypress/yarn.lock', './datahub-web-react/yarn.lock') }}" >> "$GITHUB_OUTPUT"
	echo "yarn_cache_key_prefix=docker-unified-${{ runner.os }}-yarn-" >> "$GITHUB_OUTPUT"

	smoke_test_lint:
	name: Lint on smoke tests
	runs-on: ${{ needs.setup.outputs.test_runner_type_small }}
	needs: setup
	if: ${{ needs.setup.outputs.smoke_test_change == 'true' }}
	steps:
	- name: Check out the repo
	uses: acryldata/sane-checkout-action@v4

	- uses: actions/setup-python@v6
	with:
	python-version: "3.11"
	cache: "pip"

	- uses: actions/cache/restore@v4
	with:
	path: \|
	~/.cache/uv
	key: ${{ needs.setup.outputs.uv_cache_key }}
	restore-keys: \|
	${{ needs.setup.outputs.uv_cache_key_prefix }}

	- uses: actions/cache/restore@v4
	with:
	path: \|
	~/.cache/yarn
	key: ${{ needs.setup.outputs.yarn_cache_key }}
	restore-keys: \|
	${{ needs.setup.outputs.yarn_cache_key_prefix }}

	- name: Run lint on smoke test
	run: \|
	python ./.github/scripts/check_python_package.py
	./gradlew :smoke-test:pythonLint
	./gradlew :smoke-test:cypressLint

	base_build:
	name: Build all images
	runs-on: ${{ needs.setup.outputs.build_runner_type }}
	needs: setup
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }} # On fork, smoke test job does the build since depot cache is not available
	outputs:
	build_id: ${{ steps.capture-build-id.outputs.build_id }}
	matrix: ${{ steps.capture-build-id.outputs.matrix }}
	steps:
	- name: Set up JDK 17
	uses: actions/setup-java@v5
	with:
	distribution: "zulu"
	java-version: 17

	- uses: actions/cache/restore@v4
	with:
	path: \|
	~/.cache/uv
	key: ${{ needs.setup.outputs.uv_cache_key }}
	restore-keys: \|
	${{ needs.setup.outputs.uv_cache_key_prefix }}

	- uses: actions/cache/restore@v4
	with:
	path: \|
	~/.cache/yarn
	key: ${{ needs.setup.outputs.yarn_cache_key }}
	restore-keys: \|
	${{ needs.setup.outputs.yarn_cache_key_prefix }}

	- uses: actions/cache/restore@v4
	with:
	path: \|
	~/.gradle/wrapper
	~/.gradle/caches/modules-2
	~/.gradle/caches/jars-*
	~/.gradle/caches/transforms-*
	key: gradle-plugins-cache
	restore-keys: \|
	gradle-plugins-cache

	- name: Set up Depot CLI
	if: ${{ env.DOCKER_CACHE == 'DEPOT' }}
	uses: depot/setup-action@v1

	- name: Check out the repo
	uses: acryldata/sane-checkout-action@v4
	with:
	checkout-head-only: false

	- uses: actions/setup-python@v6
	with:
	python-version: "3.11"
	cache: "pip"

	- name: Login to DockerHub
	uses: docker/login-action@v3
	if: ${{ needs.setup.outputs.docker-login == 'true' }}
	with:
	username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
	password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}

	- name: Build all Images (For Smoke tests)
	if: ${{ needs.setup.outputs.publish != 'true' && needs.setup.outputs.pr-publish != 'true' }}
	# If not publishing, just a subset of images required for smoke tests is sufficient.
	# Use buildImagesAll for workflow_dispatch, otherwise buildImagesQuickStartDebugConsumers
	run: \|
	if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
	# if triggered via workflow_dispatch, this can run other quickstart variants, so lets build all images to allow that.
	# we still dont need matrixed builds since this is for smoke test only.
	BUILD_TASK=":docker:buildImagesAll"
	else
	BUILD_TASK=":docker:buildImagesQuickstart"
	fi
	./gradlew $BUILD_TASK -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }} -PincludeAlpineVariants=${{ env.INCLUDE_ALPINE_VARIANTS }}

	- name: Build all Images (Publish)
	if: ${{ needs.setup.outputs.publish == 'true' \|\| needs.setup.outputs.pr-publish == 'true' }}
	# since this is for publishing, we will build all images, not just those for smoke tests. But will publish only if tests pass for publish (head images, releases).
	# for pr-publish, publish images without waiting for tests to pass.
	run: \|
	./gradlew :docker:buildImagesAll -PmatrixBuild=true -Ptag=${{ needs.setup.outputs.tag }} -PshaTag=${{ needs.setup.outputs.short_sha }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }} -PdockerPush=${{ needs.setup.outputs.pr-publish }} -PincludeAlpineVariants=${{ env.INCLUDE_ALPINE_VARIANTS }}

	- name: Capture build Id
	id: capture-build-id
	run: \|
	pip install jq
	DEPOT_BUILD_ID=$(jq -r '.["depot.build"]?.buildID' "${{ github.workspace }}/build/build-metadata.json")

	echo "build_id=${DEPOT_BUILD_ID}" >> "$GITHUB_OUTPUT"
	echo "matrix=$(jq -c '{"target":.["depot.build"].targets}' "${{ github.workspace }}/build/build-metadata.json")" >> "$GITHUB_OUTPUT"

	- name: Save build Metadata
	if: ${{ needs.setup.outputs.publish == 'true' \|\| needs.setup.outputs.pr-publish == 'true' }}
	uses: actions/upload-artifact@v4
	with:
	name: build-metadata-${{ needs.setup.outputs.tag }}
	path: \|
	${{ github.workspace }}/build/build-metadata.json
	${{ github.workspace }}/build/bake-spec-allImages.json

	- uses: actions/cache/save@v4
	if: ${{ github.ref == 'refs/heads/master' }}
	with:
	path: \|
	~/.cache/uv
	key: ${{ needs.setup.outputs.uv_cache_key }}

	- uses: actions/cache/save@v4
	if: ${{ github.ref == 'refs/heads/master' }}
	with:
	path: \|
	~/.cache/yarn
	key: ${{ needs.setup.outputs.yarn_cache_key }}
	- uses: actions/cache/save@v4
	if: ${{ github.ref == 'refs/heads/master' }}
	with:
	path: \|
	~/.gradle/wrapper
	~/.gradle/caches/modules-2
	~/.gradle/caches/jars-*
	~/.gradle/caches/transforms-*
	key: gradle-plugins-cache

	scan_images:
	permissions:
	contents: read # for actions/checkout to fetch code
	security-events: write # for github/codeql-action/upload-sarif to upload SARIF results
	actions: read # only required for a private repository by github/codeql-action/upload-sarif to get the Action run status
	name: Scan images for vulnerabilities
	runs-on: depot-ubuntu-24.04
	needs: [setup, base_build]
	if: ${{ needs.setup.outputs.publish == 'true' }}
	strategy:
	fail-fast: false
	matrix: ${{ fromJson(needs.base_build.outputs.matrix) }}
	steps:
	- name: Checkout # adding checkout step just to make trivy upload happy
	uses: acryldata/sane-checkout-action@v4
	- id: download_image
	name: Download images from depot
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
	run: \|
	depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}" --target "${{ matrix.target}}"
	docker images
	echo "docker_image=$(docker images --format '{{.Repository}}:{{.Tag}}' \| grep "${{ needs.setup.outputs.tag }}" )" >> "$GITHUB_OUTPUT"

	- name: Run Trivy vulnerability scanner
	uses: aquasecurity/[email protected]
	env:
	TRIVY_OFFLINE_SCAN: true
	TRIVY_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-db:2,ghcr.io/aquasecurity/trivy-db:2
	TRIVY_JAVA_DB_REPOSITORY: public.ecr.aws/aquasecurity/trivy-java-db:1,ghcr.io/aquasecurity/trivy-java-db:1
	with:
	image-ref: ${{ steps.download_image.outputs.docker_image }}
	format: "template"
	template: "@/contrib/sarif.tpl"
	output: "trivy-results.sarif"
	severity: "CRITICAL,HIGH"
	ignore-unfixed: true
	vuln-type: "os,library"
	trivy-config: "./trivy.yaml"
	- name: Upload Trivy scan results to GitHub Security tab
	uses: github/codeql-action/upload-sarif@v4
	with:
	sarif_file: "trivy-results.sarif"

	smoke_test_matrix:
	runs-on: ${{ needs.setup.outputs.test_runner_type_small }}
	needs: setup
	outputs:
	matrix: ${{ steps.set-matrix.outputs.matrix }}
	cypress_batch_count: ${{ steps.set-batch-count.outputs.cypress_batch_count }}
	python_batch_count: ${{ steps.set-batch-count.outputs.python_batch_count }}
	steps:
	- id: set-batch-count
	# Tests are split simply to ensure the configured number of batches for parallelization. This may need some
	# increase as a new tests added increase the duration where an additional parallel batch helps.
	# python_batch_count is used to split pytests in the smoke-test (batches of actual test functions)
	# cypress_batch_count is used to split the collection of cypress test specs into batches.
	run: \|
	if [[ "${{ env.IS_FORK }}" == "true" ]]; then
	echo "cypress_batch_count=5" >> "$GITHUB_OUTPUT"
	echo "python_batch_count=3" >> "$GITHUB_OUTPUT"
	else
	echo "cypress_batch_count=8" >> "$GITHUB_OUTPUT"
	echo "python_batch_count=7" >> "$GITHUB_OUTPUT"
	fi

	- id: set-matrix
	# For m batches for python and n batches for cypress, we need a test matrix of python x m + cypress x n.
	# while the github action matrix generation can handle these two parts individually, there isnt a way to use the
	# two generated matrices for the same job. So, produce that matrix with scripting and use the include directive
	# to add it to the test matrix.
	run: \|
	python_batch_count=${{ steps.set-batch-count.outputs.python_batch_count }}
	python_matrix='{"test_strategy":"pytests","batch":"0","batch_count":"'"$python_batch_count"'"}'
	for ((i=1;i<python_batch_count;i++)); do
	python_matrix="$python_matrix"',{"test_strategy":"pytests","batch_count":"'"$python_batch_count"'","batch":"'"$i"'"}'
	done

	cypress_batch_count=${{ steps.set-batch-count.outputs.cypress_batch_count }}
	cypress_matrix='{"test_strategy":"cypress","batch":"0","batch_count":"'"$cypress_batch_count"'"}'
	for ((i=1;i<cypress_batch_count;i++)); do
	cypress_matrix="$cypress_matrix"',{"test_strategy":"cypress","batch_count":"'"$cypress_batch_count"'","batch":"'"$i"'"}'
	done

	includes=''
	if [[ "${{ needs.setup.outputs.backend_change }}" == 'true' \|\| "${{ needs.setup.outputs.smoke_test_change }}" == 'true' \|\| "${{ needs.setup.outputs.publish }}" == 'true' ]]; then
	includes="$python_matrix,$cypress_matrix"
	elif [[ "${{ needs.setup.outputs.frontend_only }}" == 'true' ]]; then
	includes="$cypress_matrix"
	elif [[ "${{ needs.setup.outputs.ingestion_only }}" == 'true' ]]; then
	includes="$python_matrix"
	fi
	echo "matrix={\"include\":[$includes] }" >> "$GITHUB_OUTPUT"

	smoke_test:
	name: Run Smoke Tests (${{ matrix.test_strategy }}, Batch ${{ matrix.batch }}/${{ matrix.batch_count }})
	runs-on: ${{ needs.setup.outputs.test_runner_type }}
	needs: [setup, smoke_test_matrix, base_build]
	strategy:
	fail-fast: false
	matrix: ${{ fromJson(needs.smoke_test_matrix.outputs.matrix \|\| '{"include":[]}') }}
	if: ${{ always() && !failure() && !cancelled() && needs.smoke_test_matrix.outputs.matrix != '' && needs.smoke_test_matrix.outputs.matrix != '{"include":[]}' }}
	env:
	# TODO Chakru: Review if required
	MIXPANEL_API_SECRET: ${{ secrets.MIXPANEL_API_SECRET }}
	MIXPANEL_PROJECT_ID: ${{ secrets.MIXPANEL_PROJECT_ID }}
	steps:
	- name: Free up disk space
	if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}
	run: \|
	sudo apt-get remove 'dotnet-*' azure-cli \|\| true
	sudo rm -rf /usr/local/.ghcup \|\| true
	sudo rm -rf /usr/share/dotnet \|\| true
	sudo rm -rf /usr/share/swift \|\| true
	sudo rm -rf /usr/local/julia* \|\| true
	sudo rm -rf /usr/local/share/powershell \|\| true
	sudo rm -rf /usr/share/miniconda \|\| true
	sudo rm -rf /usr/local/lib/android/ \|\| true
	sudo docker system prune -a -f \|\| true
	df -h

	- uses: actions/cache/restore@v4
	with:
	path: \|
	~/.cache/uv
	key: ${{ needs.setup.outputs.uv_cache_key }}
	restore-keys: \|
	${{ needs.setup.outputs.uv_cache_key_prefix }}

	- uses: actions/cache/restore@v4
	with:
	path: \|
	~/.cache/yarn
	key: ${{ needs.setup.outputs.yarn_cache_key }}
	restore-keys: \|
	${{ needs.setup.outputs.yarn_cache_key_prefix }}

	- name: Check out the repo
	uses: acryldata/sane-checkout-action@v4
	with:
	checkout-head-only: false

	- name: Set up Depot CLI
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
	uses: depot/setup-action@v1

	- uses: actions/setup-python@v6
	with:
	python-version: "3.11"
	cache: "pip"

	- name: Detect workflow retry
	id: retry-detection
	run: \|
	if [[ "${{ github.run_attempt }}" -gt 1 ]]; then
	echo "This is retry attempt ${{ github.run_attempt }}"
	echo "is_retry=true" >> "$GITHUB_OUTPUT"
	PREVIOUS_ATTEMPT=$(($(echo "${{ github.run_attempt }}") - 1))
	echo "previous_attempt=${PREVIOUS_ATTEMPT}" >> "$GITHUB_OUTPUT"
	else
	echo "This is the first attempt"
	echo "is_retry=false" >> "$GITHUB_OUTPUT"
	fi

	- name: Download previous test results
	if: steps.retry-detection.outputs.is_retry == 'true'
	id: download-artifacts
	continue-on-error: true
	env:
	GH_TOKEN: ${{ github.token }}
	run: \|
	set +e

	echo "Downloading artifacts from run ${{ github.run_id }}, attempt ${{ steps.retry-detection.outputs.previous_attempt }}"

	# Create directory for previous results
	mkdir -p "${{ github.workspace }}/previous-test-results"

	# Get artifact ID for this batch's test results
	ARTIFACT_NAME="Test Results (smoke tests) ${{ matrix.test_strategy }} ${{ matrix.batch }}"
	echo "Looking for artifact: ${ARTIFACT_NAME}"

	# Query artifacts for this workflow run
	ARTIFACT_ID=$(gh api "repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/artifacts" \
	--jq ".artifacts[] \| select(.name == \"${ARTIFACT_NAME}\") \| .id" \| head -1)

	if [[ -z "$ARTIFACT_ID" ]]; then
	echo "No artifact found for batch ${{ matrix.batch }}"
	echo "download_success=false" >> "$GITHUB_OUTPUT"
	exit 0
	fi

	echo "Found artifact ID: ${ARTIFACT_ID}"

	# Download and extract artifact
	cd "${{ github.workspace }}/previous-test-results"
	gh api "repos/${{ github.repository }}/actions/artifacts/${ARTIFACT_ID}/zip" > artifact.zip
	unzip -q artifact.zip

	# Verify we got XML files
	if [[ "${{ matrix.test_strategy }}" == "cypress" ]]; then
	# Cypress XMLs are in smoke-test/tests/cypress/build/smoke-test-results/
	if find . -path "/smoke-test-results/cypress-test-.xml" -print -quit \| grep -q .; then
	echo "Successfully downloaded cypress test results"
	echo "download_success=true" >> "$GITHUB_OUTPUT"
	else
	echo "No cypress test XML files found in artifact"
	echo "download_success=false" >> "$GITHUB_OUTPUT"
	fi
	else
	# Pytest XMLs are in smoke-test/junit.*.xml
	if find . -path "/junit.xml" -print -quit \| grep -q .; then
	echo "Successfully downloaded pytest test results"
	echo "download_success=true" >> "$GITHUB_OUTPUT"
	else
	echo "No pytest XML files found in artifact"
	echo "download_success=false" >> "$GITHUB_OUTPUT"
	fi
	fi

	- name: Parse failed Cypress tests
	if: \|
	steps.retry-detection.outputs.is_retry == 'true' &&
	matrix.test_strategy == 'cypress' &&
	steps.download-artifacts.outputs.download_success == 'true'
	id: parse-cypress-failures
	run: \|
	set +e

	OUTPUT_FILE="${{ github.workspace }}/failed-tests-batch-${{ matrix.batch }}.txt"

	python3 .github/scripts/parse_failed_cypress_tests.py \
	--input-dir "${{ github.workspace }}/previous-test-results" \
	--output "${OUTPUT_FILE}"

	EXIT_CODE=$?

	case $EXIT_CODE in
	0)
	echo "parse_result=has_failures" >> "$GITHUB_OUTPUT"
	echo "filtered_tests_file=${OUTPUT_FILE}" >> "$GITHUB_OUTPUT"
	echo "Will retry $(wc -l < ${OUTPUT_FILE}) failed test(s)"
	;;
	2)
	echo "parse_result=all_passed" >> "$GITHUB_OUTPUT"
	echo "All tests passed in previous attempt - will skip batch"
	;;
	3)
	echo "parse_result=no_artifacts" >> "$GITHUB_OUTPUT"
	echo "No test results found - will run all tests"
	;;
	*)
	echo "parse_result=error" >> "$GITHUB_OUTPUT"
	echo "Error parsing test results - will run all tests"
	;;
	esac

	- name: Parse failed pytest modules
	if: \|
	steps.retry-detection.outputs.is_retry == 'true' &&
	matrix.test_strategy == 'pytests' &&
	steps.download-artifacts.outputs.download_success == 'true'
	id: parse-pytest-failures
	run: \|
	set +e

	OUTPUT_FILE="${{ github.workspace }}/failed-modules-batch-${{ matrix.batch }}.txt"

	python3 .github/scripts/parse_failed_pytest_tests.py \
	--input-dir "${{ github.workspace }}/previous-test-results" \
	--output "${OUTPUT_FILE}"

	EXIT_CODE=$?

	case $EXIT_CODE in
	0)
	echo "parse_result=has_failures" >> "$GITHUB_OUTPUT"
	echo "filtered_tests_file=${OUTPUT_FILE}" >> "$GITHUB_OUTPUT"
	echo "Will retry $(wc -l < ${OUTPUT_FILE}) failed module(s)"
	;;
	2)
	echo "parse_result=all_passed" >> "$GITHUB_OUTPUT"
	echo "All tests passed in previous attempt - will skip batch"
	;;
	3)
	echo "parse_result=no_artifacts" >> "$GITHUB_OUTPUT"
	echo "No test results found - will run all tests"
	;;
	*)
	echo "parse_result=error" >> "$GITHUB_OUTPUT"
	echo "Error parsing test results - will run all tests"
	;;
	esac

	- name: Clean up downloaded artifacts
	if: steps.retry-detection.outputs.is_retry == 'true'
	run: \|
	# Cleaning up downloaded test results to prevent contamination of current run
	rm -rf "${{ github.workspace }}/previous-test-results"

	- name: Skip batch if all tests passed
	if: \|
	steps.parse-cypress-failures.outputs.parse_result == 'all_passed' \|\|
	steps.parse-pytest-failures.outputs.parse_result == 'all_passed'
	run: \|
	echo "✓ All tests passed in previous attempt for ${{ matrix.test_strategy }} batch ${{ matrix.batch }}"
	echo "Skipping this batch to optimize CI time (Docker images, quickstart, and tests)"
	exit 0

	- uses: gradle/actions/setup-gradle@v4
	if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}

	- name: Login to DockerHub
	uses: docker/login-action@v3
	if: ${{ needs.setup.outputs.docker-login == 'true' }}
	with:
	username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
	password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}

	- name: Disk Space Analysis
	run: \|
	echo "=== Disk Usage Overview ==="
	df -h

	echo -e "\n=== Docker Disk Usage ==="
	docker system df -v

	- name: build images
	if: ${{ needs.setup.outputs.use_depot_cache != 'true' }}
	run: \|
	./gradlew :docker:buildImagesQuickstartDebugConsumers -Ptag=${{ needs.setup.outputs.tag }} -PpythonDockerVersion=${{ needs.setup.outputs.python_release_version }} -PdockerRegistry=${{ env.DOCKER_REGISTRY }} -PincludeAlpineVariants=${{ env.INCLUDE_ALPINE_VARIANTS }}
	docker images
	env:
	DOCKER_CACHE: GITHUB

	- name: pull images from depot
	if: ${{ needs.setup.outputs.use_depot_cache == 'true' }}
	run: \|
	depot pull --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}"
	docker images

	- name: Disk Space Analysis
	run: \|
	echo "=== Disk Usage Overview ==="
	df -h

	echo -e "\n=== Docker Disk Usage ==="
	docker system df -v
	- name: run quickstart
	env:
	DATAHUB_TELEMETRY_ENABLED: false
	DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
	DATAHUB_ACTIONS_IMAGE: ${{ env.DATAHUB_ACTIONS_IMAGE }}
	ACTIONS_EXTRA_PACKAGES: "acryl-datahub-actions[executor] acryl-datahub-actions"
	ACTIONS_CONFIG: "https://raw.githubusercontent.com/acryldata/datahub-actions/main/docker/config/executor.yaml"
	run: \|
	# Quickstart uses PROFILE_NAME env if defined to start the profile specified. Defaults to quickstart-consumers
	./smoke-test/run-quickstart.sh

	- name: Disk Check
	run: df -h . && docker images

	- name: Disable ES Disk Threshold
	run: \|
	curl -XPUT "http://localhost:9200/_cluster/settings" \
	-H 'Content-Type: application/json' -d'{
	"persistent": {
	"cluster": {
	"routing": {
	"allocation.disk.threshold_enabled": false
	}
	}
	}
	}'

	- name: Install dependencies
	run: ./metadata-ingestion/scripts/install_deps.sh

	- name: Build datahub cli
	run: \|
	./gradlew :metadata-ingestion:install

	- name: Smoke test
	if: \|
	steps.parse-cypress-failures.outputs.parse_result != 'all_passed' &&
	steps.parse-pytest-failures.outputs.parse_result != 'all_passed'
	env:
	RUN_QUICKSTART: false
	DATAHUB_VERSION: ${{ needs.setup.outputs.tag }}
	CYPRESS_RECORD_KEY: ${{ secrets.CYPRESS_RECORD_KEY }}
	CLEANUP_DATA: "false"
	TEST_STRATEGY: ${{ matrix.test_strategy }}
	BATCH_COUNT: ${{ matrix.batch_count }}
	BATCH_NUMBER: ${{ matrix.batch }}
	FILTERED_TESTS: ${{ steps.parse-cypress-failures.outputs.filtered_tests_file \|\| steps.parse-pytest-failures.outputs.filtered_tests_file \|\| '' }}
	run: \|
	if [[ -n "$FILTERED_TESTS" && -f "$FILTERED_TESTS" ]]; then
	echo "=========================================="
	if [[ "${{ matrix.test_strategy }}" == "cypress" ]]; then
	echo "RETRY MODE: Running only failed Cypress tests"
	else
	echo "RETRY MODE: Running only failed pytest modules"
	fi
	echo "=========================================="
	echo "Failed items to retry:"
	cat "$FILTERED_TESTS"
	echo "=========================================="
	elif [[ "${{ steps.retry-detection.outputs.is_retry }}" == "true" ]]; then
	echo "RETRY MODE: Running all tests (fallback)"
	fi

	echo "$DATAHUB_VERSION"
	./gradlew --stop
	./smoke-test/smoke.sh

	- name: Java SDK V2 Integration Tests
	if: ${{ (needs.setup.outputs.backend_change == 'true' \|\| needs.setup.outputs.java_client_change == 'true') && matrix.batch == '0' }}
	env:
	DATAHUB_SERVER: http://localhost:8080
	ADMIN_USERNAME: datahub
	ADMIN_PASSWORD: datahub
	run: \|
	echo "Running Java SDK V2 integration tests against running DataHub instance..."
	./gradlew :metadata-integration:java:datahub-client:test --tests "Integration"

	- name: Upload Java SDK V2 coverage to Codecov
	if: ${{ always() && (needs.setup.outputs.backend_change == 'true' \|\| needs.setup.outputs.java_client_change == 'true') && matrix.batch == '0' }}
	uses: codecov/codecov-action@v5
	with:
	token: ${{ secrets.CODECOV_TOKEN }}
	directory: ./build/coverage-reports/metadata-integration/java/datahub-client/
	flags: metadata-integration
	name: java-sdk-v2-integration
	fail_ci_if_error: false
	verbose: true
	override_branch: ${{ github.head_ref \|\| github.ref_name }}

	- name: Disk Check
	run: df -h . && docker images

	- name: store logs
	if: failure()
	run: \|
	docker ps -a
	TEST_STRATEGY="-${{ matrix.test_strategy }}-${{ matrix.batch }}"
	source .github/scripts/docker_logs.sh
	- name: Upload logs
	uses: actions/upload-artifact@v4
	if: failure()
	with:
	name: docker-logs-${{ matrix.test_strategy }}-${{ matrix.batch }}
	path: "docker_logs/*.log"
	retention-days: 5
	- name: Upload screenshots
	uses: actions/upload-artifact@v4
	if: failure()
	with:
	name: cypress-snapshots-${{ matrix.test_strategy }}-${{ matrix.batch }}
	path: smoke-test/tests/cypress/cypress/screenshots/
	- uses: actions/upload-artifact@v4
	if: always()
	with:
	name: Test Results (smoke tests) ${{ matrix.test_strategy }} ${{ matrix.batch }}
	path: \|
	/build/reports/tests/test/
	/build/test-results/test/
	*/smoke-test-results/cypress-test-.xml
	*/junit..xml
	!/binary/
	- name: Send failed test metrics to PostHog
	if: failure()
	continue-on-error: true
	env:
	POSTHOG_API_KEY: ${{ secrets.POSTHOG_API_KEY }}
	POSTHOG_HOST: ${{ secrets.POSTHOG_HOST }}
	run: \|
	if [ -z "$POSTHOG_API_KEY" ]; then
	echo "⚠️ POSTHOG_API_KEY not configured, skipping test failure metrics"
	exit 0
	fi

	TEMP_DIR=$(mktemp -d)
	mkdir -p "$TEMP_DIR/test-results"
	find . -name ".xml" -path "/build/test-results/*" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null \|\| true
	find . -name "cypress-test-*.xml" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null \|\| true
	find . -name "junit.*.xml" -exec cp {} "$TEMP_DIR/test-results/" \; 2>/dev/null \|\| true

	python3 .github/scripts/send_failed_tests_to_posthog.py \
	--input-dir "$TEMP_DIR/test-results" \
	--posthog-api-key "$POSTHOG_API_KEY" \
	--posthog-host "${POSTHOG_HOST:-https://app.posthog.com}" \
	--repository "${{ github.repository }}" \
	--workflow-name "${{ github.workflow }}" \
	--branch "${{ github.head_ref \|\| github.ref_name }}" \
	--run-id "${{ github.run_id }}" \
	--run-attempt "${{ github.run_attempt }}" \
	--batch "${{ matrix.batch }}" \
	--batch-count "${{ strategy.job-total }}" \
	--test-strategy "${{ matrix.test_strategy }}"

	rm -rf "$TEMP_DIR"
	- name: Upload test results to Codecov
	if: ${{ !cancelled() }}
	uses: codecov/test-results-action@v1
	with:
	token: ${{ secrets.CODECOV_TOKEN }}
	override_branch: ${{ github.head_ref \|\| github.ref_name }}

	- uses: actions/cache/save@v4
	if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }}
	# The cache does not need to be saved by all the parallel workers. The cache contents is not dependent on tests.
	with:
	path: \|
	~/.cache/uv
	key: ${{ needs.setup.outputs.uv_cache_key }}

	- uses: actions/cache/save@v4
	if: ${{ github.ref == 'refs/heads/master' && matrix.batch == '0' }}
	with:
	path: \|
	~/.cache/yarn
	key: ${{ needs.setup.outputs.yarn_cache_key }}

	publish_images:
	name: Push images after tests pass
	runs-on: ${{ needs.setup.outputs.test_runner_type_small \|\| 'ubuntu-latest' }}
	needs: [setup, smoke_test, base_build]
	if: ${{ always() && !failure() && !cancelled() && needs.setup.result != 'skipped' }}
	steps:
	- name: Check if tests have passed
	id: tests_passed
	run: \|
	# Check the overall result of the matrix job
	# Matrix jobs can have mixed results, so we check for any failures
	if [[ "${{ needs.smoke_test.result }}" == "failure" ]]; then
	echo "Smoke tests failed, skipping image pushing"
	echo "tests_passed=false" >> "$GITHUB_OUTPUT"
	exit 1
	elif [[ "${{ needs.smoke_test.result }}" == "cancelled" ]]; then
	echo "Smoke tests were cancelled, skipping image pushing"
	echo "tests_passed=false" >> "$GITHUB_OUTPUT"
	exit 1
	else
	echo "Smoke tests completed successfully, proceeding with image pushing"
	echo "tests_passed=true" >> "$GITHUB_OUTPUT"
	fi

	- name: Set up Depot CLI
	if: ${{ steps.tests_passed.outputs.tests_passed == 'true' && needs.setup.outputs.use_depot_cache == 'true' }}
	uses: depot/setup-action@v1

	- name: Login to DockerHub
	uses: docker/login-action@v3
	if: ${{ steps.tests_passed.outputs.tests_passed == 'true' && needs.setup.outputs.docker-login == 'true' }}
	with:
	username: ${{ secrets.ACRYL_DOCKER_USERNAME }}
	password: ${{ secrets.ACRYL_DOCKER_PASSWORD }}

	- name: Download build Metadata
	if: ${{ needs.setup.outputs.publish == 'true' \|\| needs.setup.outputs.pr-publish == 'true' }}
	uses: actions/download-artifact@v6
	with:
	name: build-metadata-${{ needs.setup.outputs.tag }}
	path: ${{ github.workspace }}/build

	- name: Push images from depot builder
	if: ${{ steps.tests_passed.outputs.tests_passed == 'true' && needs.setup.outputs.use_depot_cache == 'true' && needs.setup.outputs.publish == 'true' }}
	run: \|
	set -euo pipefail
	depot bake -f "${{ github.workspace }}/build/bake-spec-allImages.json" --print \| jq -c '.target \| to_entries \| map({target: .key, tags: .value.tags[]})'\| jq -c '.[]' \| while IFS= read -r line; do
	TARGET=$(echo "$line" \| jq -r '.target')
	TAG=$(echo "$line" \| jq -r '.tags')

	depot push --project "${{ env.DEPOT_PROJECT_ID }}" "${{ needs.base_build.outputs.build_id }}" --target "$TARGET" --tag "$TAG"
	done

	deploy_datahub_head:
	name: Deploy to Datahub HEAD
	runs-on: ubuntu-latest
	needs: [setup, smoke_test_lint, smoke_test, publish_images]
	steps:
	- uses: aws-actions/configure-aws-credentials@v5
	if: ${{ needs.setup.outputs.publish != 'false' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }}
	with:
	aws-access-key-id: ${{ secrets.AWS_SQS_ACCESS_KEY_ID }}
	aws-secret-access-key: ${{ secrets.AWS_SQS_ACCESS_KEY }}
	aws-region: us-west-2
	- uses: isbang/[email protected]
	if: ${{ needs.setup.outputs.publish != 'false' && github.repository_owner == 'datahub-project' && needs.setup.outputs.repository_name == 'datahub' }}
	with:
	sqs-url: ${{ secrets.DATAHUB_HEAD_SYNC_QUEUE }}
	message: '{ "command": "git-sync", "args" : {"repoName": "${{ needs.setup.outputs.repository_name }}", "repoOrg": "${{ github.repository_owner }}", "repoBranch": "${{ needs.setup.outputs.branch_name }}", "repoShaShort": "${{ needs.setup.outputs.short_sha }}" }}'

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

feat(bigquery): Add pushdown_deny_usernames and pushdown_allow_usernames for server-side user filtering #48113

Workflow file

feat(bigquery): Add pushdown_deny_usernames and pushdown_allow_usernames for server-side user filtering #48113

Uh oh!

Workflow file for this run