Introducing Code Review category #2219
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| workflow_dispatch: | |
| permissions: | |
| contents: read | |
| env: | |
| EVALUATION_RESULTS_DIR: evaluation_results | |
| jobs: | |
| lint-and-test: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v5 | |
| - name: Setup Python with UV | |
| uses: ./.github/actions/setup-python-uv | |
| with: | |
| all-extras: true | |
| - name: Run pre-commit | |
| uses: pre-commit/action@v3.0.1 | |
| - name: Run tests with coverage | |
| run: uv run pytest --cov=src/bcbench --cov-report=term-missing | |
| select-category: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| category: ${{ steps.random.outputs.category }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v5 | |
| - name: Setup Python with UV | |
| uses: ./.github/actions/setup-python-uv | |
| - name: Select random category | |
| id: random | |
| shell: pwsh | |
| run: | | |
| $categories = (uv run bcbench category list) -split "`n" | Where-Object { $_ } | |
| $selected = $categories | Get-Random | |
| echo "category=$selected" >> $env:GITHUB_OUTPUT | |
| get-entries: | |
| needs: select-category | |
| uses: ./.github/workflows/get-entries.yml | |
| with: | |
| test-run: true | |
| category: ${{ needs.select-category.outputs.category }} | |
| mock-evaluation: | |
| runs-on: ubuntu-latest | |
| needs: [get-entries, select-category] | |
| if: needs.get-entries.outputs.entries != '[]' | |
| outputs: | |
| results-dir: ${{ env.EVALUATION_RESULTS_DIR }} | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| entry: ${{ fromJson(needs.get-entries.outputs.entries) }} | |
| name: Test Run for ${{ matrix.entry }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v5 | |
| - name: Setup Python with UV | |
| uses: ./.github/actions/setup-python-uv | |
| - name: Run mock evaluation for ${{ matrix.entry }} | |
| run: uv run bcbench evaluate mock "${{ matrix.entry }}" --category ${{ needs.select-category.outputs.category }} --output-dir evaluation_results --run-id ${{ github.run_id }} | |
| - name: Upload mock evaluation results | |
| uses: actions/upload-artifact@v6 | |
| with: | |
| name: ${{ matrix.entry }} | |
| path: ${{ env.EVALUATION_RESULTS_DIR }}/**/*.jsonl | |
| retention-days: 1 | |
| summarize-results: | |
| needs: [mock-evaluation, select-category] | |
| uses: ./.github/workflows/summarize-results.yml | |
| permissions: | |
| contents: write | |
| id-token: write | |
| with: | |
| results-dir: ${{ needs.mock-evaluation.outputs.results-dir }} | |
| model: ${{ github.run_id }} | |
| agent: "mock-agent" | |
| mock: true | |
| category: ${{ needs.select-category.outputs.category }} | |
| secrets: inherit |