11name : GPU E2E Test
22
33on :
4+ pull_request :
5+ types : [opened, reopened, synchronize, labeled]
46 pull_request_target :
5- types :
6- - labeled
7+ types : [labeled]
78
89jobs :
9- e2e-test :
10- if : contains(github.event.pull_request.labels.*.name, 'ok-to-test-gpu-runner')
11- name : E2E Test
10+ gpu-e2e-test :
11+ if : |
12+ contains(join(github.event.pull_request.labels.*.name, ','), 'ok-to-test-gpu-runner') ||
13+ (github.event_name == 'pull_request_target' && github.event.label && github.event.label.name == 'ok-to-test-gpu-runner')
14+ name : GPU E2E Test
1215 runs-on : oracle-vm-16cpu-a10gpu-240gb
1316
1417 env :
2023 strategy :
2124 fail-fast : false
2225 matrix :
23- # Kubernetes versions for e2e tests on Kind cluster.
2426 kubernetes-version : ["1.33.1"]
2527
2628 steps :
4143
4244 - name : Install dependencies
4345 run : |
44- echo "Install Papermill"
4546 pip install papermill==2.6.0 jupyter==1.1.1 ipykernel==6.29.5
46-
47- echo "Install Kubeflow SDK"
4847 pip install git+https://github.com/kubeflow/sdk.git@main
4948
5049 - name : Setup cluster
@@ -65,11 +64,77 @@ jobs:
6564 run : |
6665 make test-e2e-delete-gpu-cluster
6766
68- # TODO (andreyvelich): Discuss how we can upload artifacts for multiple Notebooks.
6967 - name : Upload Artifacts to GitHub
7068 uses : actions/upload-artifact@v4
7169 if : always()
7270 with :
7371 name : ${{ matrix.kubernetes-version }}
7472 path : ${{ env.GOPATH }}/src/github.com/kubeflow/trainer/artifacts/*
7573 retention-days : 1
74+
75+ comment-if-missing-label :
76+ if : github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && github.actor != 'github-actions[bot]')
77+ runs-on : ubuntu-latest
78+
79+ steps :
80+ - name : Checkout PR branch
81+ uses : actions/checkout@v4
82+ with :
83+ ref : ${{ github.event.pull_request.head.sha }}
84+ fetch-depth : 0
85+
86+ - name : Detect changes in LLM blueprint directories
87+ id : changes
88+ run : |
89+ git fetch origin ${{ github.event.pull_request.base.sha }} --depth=1
90+ git fetch origin ${{ github.event.pull_request.head.sha }} --depth=1
91+
92+ if git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.event.pull_request.head.sha }} \
93+ | grep -E '^examples/(deepspeed|torchtune)/' >/dev/null; then
94+ echo "found_changes=true" >> $GITHUB_OUTPUT
95+ else
96+ echo "found_changes=false" >> $GITHUB_OUTPUT
97+ fi
98+
99+ - name : Check for ok-to-test-gpu-runner label
100+ id : label-check
101+ uses : actions/github-script@v7
102+ with :
103+ script : |
104+ const { data: labels } = await github.rest.issues.listLabelsOnIssue({
105+ owner: context.repo.owner,
106+ repo: context.repo.repo,
107+ issue_number: context.issue.number
108+ });
109+ const hasLabel = labels.some(l => l.name === 'ok-to-test-gpu-runner');
110+ core.setOutput('has_label', hasLabel ? 'true' : 'false');
111+
112+ - name : Check if comment already exists
113+ id : comment-exists
114+ uses : actions/github-script@v7
115+ with :
116+ script : |
117+ const { data: comments } = await github.rest.issues.listComments({
118+ owner: context.repo.owner,
119+ repo: context.repo.repo,
120+ issue_number: context.issue.number
121+ });
122+ const botCommentExists = comments.some(c =>
123+ c.user.type === 'Bot' &&
124+ c.body.includes('ok-to-test-gpu-runner')
125+ );
126+ core.setOutput('exists', botCommentExists ? 'true' : 'false');
127+
128+ - name : Post comment requesting label
129+ if : steps.changes.outputs.found_changes == 'true' && steps.label-check.outputs.has_label == 'false' && steps.comment-exists.outputs.exists == 'false'
130+ uses : actions/github-script@v7
131+ with :
132+ script : |
133+ await github.rest.issues.createComment({
134+ owner: context.repo.owner,
135+ repo: context.repo.repo,
136+ issue_number: context.issue.number,
137+ body: `Hi @andreyvelich @varodrig @jaiakash 👋,
138+ Changes were detected in the LLM blueprint directories (\`examples/deepspeed\` or \`examples/torchtune\`).
139+ To run the test on GPU self runner, please add the **ok-to-test-gpu-runner** label to this PR.`
140+ });
0 commit comments