Skip to content

Commit e00d063

Browse files
haoranpbCopilot
andauthored
Extracts generic changes identified when onboarding code review category (#655)
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent e5f50e3 commit e00d063

12 files changed

Lines changed: 147 additions & 70 deletions

File tree

.github/workflows/claude-evaluation.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ jobs:
6060
category: ${{ inputs.category }}
6161

6262
evaluate-with-claude-code:
63-
runs-on: [ GitHub-BCBench ]
63+
runs-on: ${{ needs.get-entries.outputs.runner }}
6464
needs: get-entries
6565
outputs:
6666
results-dir: ${{ env.EVALUATION_RESULTS_DIR }}
@@ -91,6 +91,7 @@ jobs:
9191
azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
9292
azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
9393
github-token: ${{ secrets.GITHUB_TOKEN }}
94+
skip-container: ${{ needs.get-entries.outputs.requires-container != 'true' }}
9495

9596
- name: Setup Python with UV
9697
uses: ./.github/actions/setup-python-uv
@@ -110,6 +111,7 @@ jobs:
110111

111112
- name: Run Claude Code for entry ${{ matrix.entry }}
112113
timeout-minutes: 120
114+
shell: pwsh
113115
env:
114116
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
115117
run: |

.github/workflows/copilot-evaluation.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ jobs:
6767
category: ${{ inputs.category }}
6868

6969
evaluate-with-copilot-cli:
70-
runs-on: [ GitHub-BCBench ]
70+
runs-on: ${{ needs.get-entries.outputs.runner }}
7171
needs: get-entries
7272
outputs:
7373
results-dir: ${{ env.EVALUATION_RESULTS_DIR }}
@@ -98,6 +98,7 @@ jobs:
9898
azure-client-id: ${{ secrets.AZURE_CLIENT_ID }}
9999
azure-tenant-id: ${{ secrets.AZURE_TENANT_ID }}
100100
github-token: ${{ secrets.GITHUB_TOKEN }}
101+
skip-container: ${{ needs.get-entries.outputs.requires-container != 'true' }}
101102

102103
- name: Setup Python with UV
103104
uses: ./.github/actions/setup-python-uv
@@ -123,6 +124,7 @@ jobs:
123124
124125
- name: Run GitHub Copilot CLI for entry ${{ matrix.entry }}
125126
timeout-minutes: 120
127+
shell: pwsh
126128
env:
127129
COPILOT_GITHUB_TOKEN: ${{ steps.select-pat.outputs.pat_index == '0' &&
128130
secrets.COPILOT_PAT || (steps.select-pat.outputs.pat_index == '1' &&

.github/workflows/get-entries.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,20 @@ on:
2424
entries:
2525
description: JSON array of dataset entries
2626
value: ${{ jobs.get-entries.outputs.entries }}
27+
runner:
28+
description: GitHub Actions runner label to evaluate this category on
29+
value: ${{ jobs.get-entries.outputs.runner }}
30+
requires-container:
31+
description: Whether this category needs a BC container ("true"/"false")
32+
value: ${{ jobs.get-entries.outputs.requires-container }}
2733

2834
jobs:
2935
get-entries:
3036
runs-on: ubuntu-latest
3137
outputs:
3238
entries: ${{ steps.get-entries.outputs.entries }}
39+
runner: ${{ steps.runtime-config.outputs.runner }}
40+
requires-container: ${{ steps.runtime-config.outputs.requires-container }}
3341
steps:
3442
- name: Checkout repository
3543
uses: actions/checkout@v5
@@ -39,6 +47,10 @@ jobs:
3947
- name: Setup Python with UV
4048
uses: ./.github/actions/setup-python-uv
4149

50+
- name: Get runtime config for matrix
51+
id: runtime-config
52+
run: uv run bcbench category runtime-config --category ${{ inputs.category }}
53+
4254
- name: Get entries for matrix
4355
id: get-entries
4456
run: |

src/bcbench/agent/copilot/agent.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ def run_copilot_agent(
4949
logger.info(f"Executing Copilot CLI in directory: {repo_path}")
5050
logger.debug(f"Using prompt:\n{prompt}")
5151

52-
copilot_cmd = shutil.which("copilot.cmd") or shutil.which("copilot")
52+
# Prefer copilot.exe over copilot.bat/copilot.cmd shims on Windows: the .bat shim invokes PowerShell,
53+
# which re-parses arguments and corrupts prompts containing double quotes (e.g. JSON examples).
54+
copilot_cmd = shutil.which("copilot.exe") or shutil.which("copilot.cmd") or shutil.which("copilot")
5355
if not copilot_cmd:
5456
raise AgentError("Copilot CLI not found in PATH. Please ensure it is installed and available.")
5557

src/bcbench/commands/category.py

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
1-
import os
21
import sys
3-
from pathlib import Path
42

53
import typer
6-
from typing_extensions import Annotated
74

85
from bcbench.cli_options import EvaluationCategoryOption
6+
from bcbench.github_actions import write_step_outputs
97
from bcbench.types import EvaluationCategory
108

119
category_app = typer.Typer(help="Category-specific configuration helpers")
@@ -19,32 +17,26 @@ def list_categories() -> None:
1917

2018

2119
@category_app.command("bceval-config")
22-
def bceval_config(
23-
category: EvaluationCategoryOption,
24-
github_output: Annotated[
25-
Path | None,
26-
typer.Option(envvar="GITHUB_OUTPUT", help="Append outputs to this file (typically $GITHUB_OUTPUT)"),
27-
] = None,
28-
) -> None:
20+
def bceval_config(category: EvaluationCategoryOption) -> None:
2921
"""
30-
Print the bc-eval evaluator list and core score for a category as key=value lines.
22+
Emit the bc-eval evaluator list and core score for a category as step outputs.
3123
32-
When run inside a GitHub Actions step with $GITHUB_OUTPUT set, the lines are
33-
appended to that file so they become step outputs. Otherwise they're written
34-
to stdout.
24+
The lines are appended to $GITHUB_OUTPUT so they become GitHub Actions step outputs. Outside of Actions nothing is written.
3525
"""
36-
lines: list[str] = [
37-
f"evaluators={','.join(category.evaluators)}",
38-
f"core_score={category.core_score}",
39-
]
40-
payload: str = "\n".join(lines) + "\n"
41-
42-
if github_output:
43-
with open(github_output, "a", encoding="utf-8") as file:
44-
file.write(payload)
45-
else:
46-
sys.stdout.write(payload)
47-
48-
# Always echo to stderr so workflow logs show what was emitted.
49-
if os.getenv("GITHUB_ACTIONS"):
50-
sys.stderr.write(payload)
26+
write_step_outputs(
27+
{
28+
"evaluators": ",".join(category.evaluators),
29+
"core_score": category.core_score,
30+
}
31+
)
32+
33+
34+
@category_app.command("runtime-config")
35+
def runtime_config(category: EvaluationCategoryOption) -> None:
36+
"""Emit the GitHub Actions runner label and container requirement for a category."""
37+
write_step_outputs(
38+
{
39+
"runner": category.runner,
40+
"requires-container": str(category.requires_container).lower(),
41+
}
42+
)

src/bcbench/commands/dataset.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@
66
from typing_extensions import Annotated
77

88
from bcbench.cli_options import EvaluationCategoryOption
9-
from bcbench.config import get_config
109
from bcbench.dataset import BaseDatasetEntry
1110
from bcbench.dataset.dataset_entry import _BugFixTestGenBase
12-
from bcbench.exceptions import ConfigurationError
11+
from bcbench.github_actions import write_step_outputs
1312
from bcbench.logger import get_logger
1413
from bcbench.types import EvaluationCategory
1514

@@ -60,7 +59,7 @@ def list_entries(
6059
print(f" - {entry_id}")
6160

6261
if github_output:
63-
_write_github_output(github_output, json.dumps(entry_ids))
62+
write_step_outputs({github_output: json.dumps(entry_ids)})
6463

6564

6665
@dataset_app.command("view")
@@ -150,12 +149,3 @@ def _modified_instance_ids_from_diff(diff_output: str) -> list[str]:
150149
instance_ids.append(entry_data["instance_id"])
151150

152151
return instance_ids
153-
154-
155-
def _write_github_output(key: str, value: str) -> None:
156-
"""Write a value to GitHub Actions output."""
157-
config = get_config()
158-
if not config.env.github_output:
159-
raise ConfigurationError("GITHUB_OUTPUT environment variable not set. This feature is only available when running in GitHub Actions.")
160-
with open(config.env.github_output, "a", encoding="utf-8") as f:
161-
f.write(f"{key}={value}\n")

src/bcbench/evaluate/bugfix.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
from bcbench.dataset import BugFixEntry
55
from bcbench.evaluate.base import EvaluationPipeline
66
from bcbench.exceptions import BuildError, TestExecutionError
7-
from bcbench.logger import get_logger, github_log_group
7+
from bcbench.github_actions import github_log_group
8+
from bcbench.logger import get_logger
89
from bcbench.operations import (
910
apply_patch,
1011
build_and_publish_projects,

src/bcbench/evaluate/testgeneration.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88
from bcbench.dataset import TestEntry, TestGenEntry
99
from bcbench.evaluate.base import EvaluationPipeline
1010
from bcbench.exceptions import BuildError, NoTestsExtractedError, TestExecutionError
11-
from bcbench.logger import get_logger, github_log_group
11+
from bcbench.github_actions import github_log_group
12+
from bcbench.logger import get_logger
1213
from bcbench.operations import (
1314
apply_patch,
1415
build_and_publish_projects,

src/bcbench/github_actions.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
"""Helpers for interacting with GitHub Actions.
2+
3+
These wrap GitHub Actions workflow features (step outputs, log groups) and are no-ops when not running inside Actions.
4+
"""
5+
6+
from __future__ import annotations
7+
8+
import os
9+
from collections.abc import Iterator
10+
from contextlib import contextmanager
11+
12+
from bcbench.config import get_config
13+
from bcbench.logger import get_logger
14+
15+
__all__ = ["github_log_group", "write_step_outputs"]
16+
17+
logger = get_logger(__name__)
18+
19+
20+
def write_step_outputs(outputs: dict[str, str]) -> None:
21+
"""Append ``key=value`` step outputs to the GitHub Actions output file.
22+
23+
The values become outputs of the current workflow step, available to downstream steps via ``steps.<id>.outputs.<key>``.
24+
25+
Args:
26+
outputs: Mapping of output names to their string values.
27+
28+
Note:
29+
When not running inside GitHub Actions (``$GITHUB_OUTPUT`` is unset), nothing is written and a warning is logged.
30+
"""
31+
github_output: str | None = os.getenv("GITHUB_OUTPUT")
32+
if not github_output:
33+
logger.warning("Not running in GitHub Actions; skipping step outputs: %s", ", ".join(outputs))
34+
return
35+
36+
with open(github_output, "a", encoding="utf-8") as file:
37+
file.writelines(f"{key}={value}\n" for key, value in outputs.items())
38+
39+
40+
@contextmanager
41+
def github_log_group(title: str) -> Iterator[None]:
42+
in_actions: bool = get_config().env.github_actions
43+
44+
if in_actions:
45+
print(f"::group::{title}", flush=True) # noqa: T201
46+
47+
try:
48+
yield
49+
finally:
50+
if in_actions:
51+
print("::endgroup::", flush=True) # noqa: T201

src/bcbench/logger.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,11 @@
33
import logging
44
import re
55
import sys
6-
from collections.abc import Iterator
7-
from contextlib import contextmanager
86
from typing import ClassVar
97

108
from bcbench.config import get_config
119

12-
__all__ = ["get_logger", "github_log_group", "setup_logger"]
10+
__all__ = ["get_logger", "setup_logger"]
1311

1412

1513
class SensitiveDataFilter(logging.Filter):
@@ -217,17 +215,3 @@ def get_logger(name: str) -> logging.Logger:
217215
name = f"bcbench.{name}"
218216

219217
return logging.getLogger(name)
220-
221-
222-
@contextmanager
223-
def github_log_group(title: str) -> Iterator[None]:
224-
config = get_config()
225-
226-
if config.env.github_actions:
227-
print(f"::group::{title}", flush=True) # noqa: T201
228-
229-
try:
230-
yield
231-
finally:
232-
if config.env.github_actions:
233-
print("::endgroup::", flush=True) # noqa: T201

0 commit comments

Comments
 (0)