Skip to content

Commit 9c766f5

Browse files
committed
Integrate MaxText CI with Codecov
Integrates Codecov using a two-flag scheme (regular, scheduled) and carryforward logic to accurately track coverage across tiered test suites. Adds codecov.yml to enable carryforward for tests skipped in PRs (scheduled_only). Updates test workflows to generate coverage reports via pytest-cov and upload results with conditional flags. Sets Project coverage to track the full scheduled baseline and Patch coverage to evaluate new code against regular PR tests.
1 parent 4e927f5 commit 9c766f5

File tree

4 files changed

+103
-4
lines changed

4 files changed

+103
-4
lines changed

.github/workflows/codecov.yml

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Copyright 2023–2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# MaxText Codecov Configuration
16+
#
17+
# We use a two-flag scheme ('regular' and 'scheduled') to handle our tiered test suite.
18+
# 'carryforward' is enabled because Pull Requests only run a subset of tests (excluding 'scheduled_only').
19+
# Without it, PRs would show a significant coverage drop as they would 'overwrite' the full-suite results.
20+
#
21+
# Scheme:
22+
# - 'regular': Updated by every PR/Schedule. Used to evaluate 'patch' (new code) coverage.
23+
# - 'scheduled': Updated ONLY by scheduled full runs. Used to anchor 'project' (total health) coverage.
24+
# During PRs, the 'scheduled' flag is carried forward from the last full run on 'main' to keep the score stable.
25+
26+
# Exclude non-source code, deprecated and experimental folders from coverage tracking
27+
ignore:
28+
- "src/MaxText/assets"
29+
- "src/MaxText/configs"
30+
- "src/MaxText/examples"
31+
- "src/MaxText/experimental"
32+
- "src/MaxText/inference"
33+
- "src/MaxText/inference_mlperf"
34+
- "src/MaxText/scratch_code"
35+
- "src/MaxText/test_assets"
36+
37+
38+
flags:
39+
# Updated on every PR and during every scheduled run (contains a subset of tests).
40+
regular:
41+
carryforward: true
42+
# Updated ONLY during scheduled runs (contains all tests).
43+
scheduled:
44+
carryforward: true
45+
46+
coverage:
47+
status:
48+
# Project score remains stable at the 'Full Suite' level.
49+
# It carries forward the last 'scheduled' results during PRs.
50+
project:
51+
default:
52+
target: auto
53+
threshold: 5% # fail on 5+ percent degradation
54+
flags:
55+
- scheduled
56+
57+
# Patch score provides feedback on the code changed in a PR.
58+
patch:
59+
default:
60+
target: auto
61+
threshold: 5% # fail on 5+ percent degradation
62+
flags:
63+
- regular
64+

.github/workflows/run_pathways_tests.yml

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ jobs:
7979
source .venv/bin/activate
8080
maxtext_wheel=$(ls maxtext-*-py3-none-any.whl 2>/dev/null)
8181
uv pip install ${maxtext_wheel}[tpu] --resolution=lowest
82+
uv pip install pytest-cov
8283
uv pip install -r src/install_maxtext_extra_deps/extra_deps_from_github.txt
8384
python3 --version
8485
python3 -m pip freeze
@@ -96,8 +97,21 @@ jobs:
9697
export MAXTEXT_TEST_ASSETS_ROOT=$(pwd)/src/MaxText/test_assets
9798
export MAXTEXT_PKG_DIR=$(pwd)/src/MaxText
9899
# TODO(b/454659463): Enable test_default_hlo_match after volume mount is supported.
99-
.venv/bin/python3 -m pytest ${{ inputs.pytest_addopts }} -v -m "${FINAL_PYTEST_MARKER}" -k "not AotHloIdenticalTest and not CompileThenLoad" --durations=0
100-
100+
.venv/bin/python3 -m pytest ${{ inputs.pytest_addopts }} \
101+
-v \
102+
-m "${FINAL_PYTEST_MARKER}" \
103+
-k "not AotHloIdenticalTest and not CompileThenLoad" \
104+
--durations=0 \
105+
--cov=src/MaxText \
106+
--cov-report=xml
107+
- name: Upload results to Codecov
108+
uses: codecov/codecov-action@v5
109+
continue-on-error: true
110+
with:
111+
token: ${{ secrets.CODECOV_TOKEN }}
112+
# If scheduled, upload to BOTH flags. If PR, upload ONLY to regular.
113+
flags: ${{ inputs.is_scheduled_run == 'true' && 'regular,scheduled' || 'regular' }}
114+
101115
services:
102116
resource_manager:
103117
image: us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:latest

.github/workflows/run_tests_against_package.yml

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ jobs:
8888
uv pip install -r src/install_maxtext_extra_deps/extra_deps_from_github.txt
8989
python3 --version
9090
python3 -m pip freeze
91+
uv pip install pytest-cov
9192
- name: Copy test assets files
9293
run : gcloud storage cp gs://maxtext-test-assets/* src/MaxText/test_assets
9394
- name: Run Tests
@@ -107,6 +108,25 @@ jobs:
107108
if [ "${{ inputs.device_type }}" != "cuda12" ]; then
108109
export LIBTPU_INIT_ARGS='--xla_tpu_scoped_vmem_limit_kib=65536'
109110
fi
111+
if [ "${{ inputs.total_workers }}" -gt 1 ]; then
112+
.venv/bin/python3 -m pip install --quiet pytest-split
113+
SPLIT_ARGS="--splits ${{ inputs.total_workers }} --group ${{ inputs.worker_group }}"
114+
else
115+
SPLIT_ARGS=""
116+
fi
110117
# TODO: Fix the skipped tests and remove the deselect flags
111-
[ "${{ inputs.total_workers }}" -gt 1 ] && .venv/bin/python3 -m pip install --quiet pytest-split && SPLIT_ARGS="--splits ${{ inputs.total_workers }} --group ${{ inputs.worker_group }}" || SPLIT_ARGS=""
112-
.venv/bin/python3 -m pytest ${{ inputs.pytest_addopts }} -v -m "${FINAL_PYTEST_MARKER}" --durations=0 --deselect "tests/tokenizer_test.py::TokenizerTest::test_detokenize" $SPLIT_ARGS
118+
.venv/bin/python3 -m pytest ${{ inputs.pytest_addopts }} \
119+
-v \
120+
-m "${FINAL_PYTEST_MARKER}" \
121+
--durations=0 \
122+
--deselect "tests/tokenizer_test.py::TokenizerTest::test_detokenize" \
123+
--cov=src/MaxText \
124+
--cov-report=xml \
125+
$SPLIT_ARGS
126+
- name: Upload results to Codecov
127+
uses: codecov/codecov-action@v5
128+
continue-on-error: true
129+
with:
130+
token: ${{ secrets.CODECOV_TOKEN }}
131+
# If scheduled, upload to BOTH flags. If PR, upload ONLY to regular.
132+
flags: ${{ inputs.is_scheduled_run == 'true' && 'regular,scheduled' || 'regular' }}

tests/integration_tests/checkpoint_compatibility_test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def run_checkpoint_compatibility(hardware, attention_type):
8282

8383
@pytest.mark.integration_test
8484
@pytest.mark.tpu_only
85+
@pytest.mark.skip(reason="Flaky test b/470704234")
8586
def test_autoselected_attention():
8687
run_checkpoint_compatibility("tpu", "autoselected")
8788

0 commit comments

Comments
 (0)