-
Notifications
You must be signed in to change notification settings - Fork 198
123 lines (103 loc) · 4.89 KB
/
Copy pathauto-retry-flaky-tests.yml
File metadata and controls
123 lines (103 loc) · 4.89 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
name: Auto-Retry Flaky Tests
# Use case: When CI fails, automatically determine if it's likely flaky and retry if so.
# Original: https://github.com/anthropics/claude-code-action/blob/main/examples/test-failure-analysis.yml
on:
workflow_run:
workflows: ["Flow build and test"]
types: [completed]
permissions:
contents: read
actions: write
id-token: write
pull-requests: write
jobs:
detect-flaky:
if: |
github.event.workflow_run.conclusion == 'failure' &&
github.event.workflow_run.head_repository.full_name == github.repository &&
github.event.workflow_run.run_attempt <= 5
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6
- name: Detect flaky test failures
id: detect
uses: anthropics/claude-code-action@a92e7c70a4da9793dc164451d829089dc057a464 # main
with:
allowed_bots: 'renovate'
anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }}
additional_permissions: |
actions: read
prompt: |
The CI workflow failed: ${{ github.event.workflow_run.html_url }}
Check the logs: gh run view ${{ github.event.workflow_run.id }} --log-failed
Determine if this looks like a flaky test failure by checking for:
- Timeout errors
- Race conditions
- Network errors
- "Expected X but got Y" intermittent failures
- Tests that passed in previous commits
Return:
- is_flaky: true if likely flaky, false if real bug
- confidence: number 0-1 indicating confidence level
- summary: brief one-sentence explanation
claude_args: |
--allowedTools 'Bash(gh run view *)' --json-schema '{"type":"object","properties":{"is_flaky":{"type":"boolean","description":"Whether this appears to be a flaky test failure"},"confidence":{"type":"number","minimum":0,"maximum":1,"description":"Confidence level in the determination"},"summary":{"type":"string","description":"One-sentence explanation of the failure"}},"required":["is_flaky","confidence","summary"]}'
# Auto-retry only if flaky AND high confidence (>= 0.7)
- name: Retry flaky tests
if: |
fromJSON(steps.detect.outputs.structured_output).is_flaky == true &&
fromJSON(steps.detect.outputs.structured_output).confidence >= 0.7
env:
GH_TOKEN: ${{ github.token }}
OUTPUT: ${{ steps.detect.outputs.structured_output }}
run: |
CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence')
SUMMARY=$(echo "$OUTPUT" | jq -r '.summary')
echo "🔄 Flaky test detected (confidence: $CONFIDENCE)"
echo "Summary: $SUMMARY"
echo ""
echo "Triggering automatic retry..."
gh run rerun ${{ github.event.workflow_run.id }} --failed
# Low confidence flaky detection - skip retry
- name: Low confidence detection
if: |
fromJSON(steps.detect.outputs.structured_output).is_flaky == true &&
fromJSON(steps.detect.outputs.structured_output).confidence < 0.7
env:
OUTPUT: ${{ steps.detect.outputs.structured_output }}
run: |
CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence')
echo "⚠️ Possible flaky test but confidence too low ($CONFIDENCE)"
echo "Not retrying automatically - manual review recommended"
# Comment on PR if this was a PR build
- name: Comment on PR
if: github.event.workflow_run.event == 'pull_request'
env:
GH_TOKEN: ${{ github.token }}
OUTPUT: ${{ steps.detect.outputs.structured_output }}
run: |
IS_FLAKY=$(echo "$OUTPUT" | jq -r '.is_flaky')
CONFIDENCE=$(echo "$OUTPUT" | jq -r '.confidence')
SUMMARY=$(echo "$OUTPUT" | jq -r '.summary')
pr_number=$(gh pr list --head "${{ github.event.workflow_run.head_branch }}" --json number --jq '.[0].number')
if [ -n "$pr_number" ]; then
if [ "$IS_FLAKY" = "true" ] && [ "$(echo "$CONFIDENCE >= 0.7" | bc)" -eq 1 ]; then
TITLE="🔄 Flaky Test Detected"
ACTION="✅ Automatically retrying the workflow"
elif [ "$IS_FLAKY" = "true" ]; then
TITLE="🔄 Possible Flaky Test"
ACTION="⚠️ Confidence too low ($CONFIDENCE) to retry automatically - manual review recommended"
else
TITLE="❌ Test Failure"
ACTION="⚠️ This appears to be a real bug - manual intervention needed"
fi
gh pr comment "$pr_number" --body "$(cat <<EOF
## $TITLE
**Analysis**: $SUMMARY
**Confidence**: $CONFIDENCE
$ACTION
[View workflow run](${{ github.event.workflow_run.html_url }})
EOF
)"
fi