Skip to content

Commit 862e927

Browse files
committed
fix(workerpals): enforce honest scope and recover shell wrappers
1 parent aeba3f4 commit 862e927

11 files changed

Lines changed: 774 additions & 28 deletions

File tree

apps/workerpals/src/backends/openai_codex/openai_codex_executor.py

Lines changed: 184 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@
9696
_VALID_AUTH_MODES = {"auto", "api_key", "chatgpt"}
9797
_VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
9898
_MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
99+
_MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS = 1_200
100+
_MAX_WRAPPER_BOOTSTRAP_TOTAL_CHARS = 5_000
99101

100102

101103
def _model_supports_xhigh_reasoning(model: str) -> bool:
@@ -1108,6 +1110,177 @@ def _build_wrapper_recovery_guidance(rejected_commands: List[str], *, hard: bool
11081110
return "\n".join(guidance_lines)
11091111

11101112

1113+
def _truncate_wrapper_bootstrap_output(text: str) -> str:
1114+
value = str(text or "").replace("\r\n", "\n").strip()
1115+
if len(value) <= _MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS:
1116+
return value
1117+
return f"{value[:_MAX_WRAPPER_BOOTSTRAP_OUTPUT_CHARS].rstrip()}\n...(truncated)"
1118+
1119+
1120+
def _resolve_repo_scoped_path(repo: str, raw_path: str) -> Optional[Path]:
1121+
candidate = str(raw_path or "").strip()
1122+
if not candidate:
1123+
return None
1124+
repo_root = Path(repo).resolve()
1125+
resolved = (repo_root / candidate).resolve()
1126+
try:
1127+
common = os.path.commonpath([str(repo_root), str(resolved)])
1128+
except ValueError:
1129+
return None
1130+
if common != str(repo_root):
1131+
return None
1132+
return resolved
1133+
1134+
1135+
def _run_wrapper_bootstrap_command(repo: str, command: str) -> str:
1136+
normalized = _normalize_command_text(command)
1137+
if not normalized:
1138+
return ""
1139+
try:
1140+
args = shlex.split(normalized, posix=True)
1141+
except ValueError:
1142+
return ""
1143+
if not args:
1144+
return ""
1145+
program = str(args[0] or "").strip().lower()
1146+
if program == "pwd" and len(args) == 1:
1147+
return repo
1148+
if program == "ls":
1149+
target = Path(repo).resolve()
1150+
if len(args) == 2 and not str(args[1]).startswith("-"):
1151+
resolved = _resolve_repo_scoped_path(repo, str(args[1]))
1152+
if not resolved:
1153+
return ""
1154+
target = resolved
1155+
elif len(args) > 1:
1156+
return ""
1157+
if not target.exists():
1158+
return f"{target.name or str(target)} (missing)"
1159+
if target.is_file():
1160+
return target.name
1161+
entries = sorted(child.name for child in target.iterdir())
1162+
return "\n".join(entries[:120])
1163+
if program == "git" and len(args) >= 2:
1164+
safe_git_args: Optional[List[str]] = None
1165+
if args[1:] == ["branch", "--show-current"]:
1166+
safe_git_args = ["git", "--no-pager", "branch", "--show-current"]
1167+
elif args[1:] == ["status", "--porcelain"]:
1168+
safe_git_args = ["git", "--no-pager", "status", "--porcelain"]
1169+
elif len(args) >= 3 and args[1] == "diff":
1170+
diff_args = list(args[2:])
1171+
sanitized_paths: List[str] = []
1172+
if diff_args == ["--name-only"]:
1173+
safe_git_args = [
1174+
"git",
1175+
"--no-pager",
1176+
"diff",
1177+
"--no-ext-diff",
1178+
"--no-textconv",
1179+
"--name-only",
1180+
]
1181+
elif len(diff_args) >= 2 and diff_args[0] == "--name-only" and diff_args[1] == "--":
1182+
for raw_path in diff_args[2:]:
1183+
resolved = _resolve_repo_scoped_path(repo, str(raw_path))
1184+
if not resolved:
1185+
return ""
1186+
sanitized_paths.append(os.path.relpath(str(resolved), repo))
1187+
safe_git_args = [
1188+
"git",
1189+
"--no-pager",
1190+
"diff",
1191+
"--no-ext-diff",
1192+
"--no-textconv",
1193+
"--name-only",
1194+
"--",
1195+
*sanitized_paths,
1196+
]
1197+
elif diff_args and diff_args[0] == "--":
1198+
for raw_path in diff_args[1:]:
1199+
resolved = _resolve_repo_scoped_path(repo, str(raw_path))
1200+
if not resolved:
1201+
return ""
1202+
sanitized_paths.append(os.path.relpath(str(resolved), repo))
1203+
safe_git_args = [
1204+
"git",
1205+
"--no-pager",
1206+
"diff",
1207+
"--no-ext-diff",
1208+
"--no-textconv",
1209+
"--",
1210+
*sanitized_paths,
1211+
]
1212+
if not safe_git_args:
1213+
return ""
1214+
proc = subprocess.run(
1215+
safe_git_args,
1216+
cwd=repo,
1217+
capture_output=True,
1218+
text=True,
1219+
timeout=15,
1220+
check=False,
1221+
)
1222+
output = proc.stdout.strip()
1223+
if proc.returncode != 0:
1224+
detail = proc.stderr.strip() or output
1225+
return f"(command failed: {detail})" if detail else "(command failed)"
1226+
return output
1227+
if program == "cat" and len(args) == 2:
1228+
resolved = _resolve_repo_scoped_path(repo, str(args[1]))
1229+
if not resolved or not resolved.is_file():
1230+
return ""
1231+
return resolved.read_text(encoding="utf-8", errors="replace")
1232+
if program == "sed" and len(args) == 4 and args[1] == "-n":
1233+
match = re.fullmatch(r"(\d+),(\d+)p", str(args[2] or "").strip())
1234+
if not match:
1235+
return ""
1236+
start = max(1, int(match.group(1)))
1237+
end = max(start, int(match.group(2)))
1238+
resolved = _resolve_repo_scoped_path(repo, str(args[3]))
1239+
if not resolved or not resolved.is_file():
1240+
return ""
1241+
lines = resolved.read_text(encoding="utf-8", errors="replace").splitlines()
1242+
return "\n".join(lines[start - 1 : end])
1243+
return ""
1244+
1245+
1246+
def _build_wrapper_bootstrap_context(repo: str, rejected_commands: List[str]) -> str:
1247+
blocks: List[str] = []
1248+
total_chars = 0
1249+
seen: set[str] = set()
1250+
for rejected in rejected_commands:
1251+
direct = _unwrap_shell_wrapper_command(rejected)
1252+
key = direct.lower()
1253+
if not direct or key in seen:
1254+
continue
1255+
seen.add(key)
1256+
output = _run_wrapper_bootstrap_command(repo, direct)
1257+
if not output:
1258+
continue
1259+
truncated = _truncate_wrapper_bootstrap_output(output)
1260+
block = (
1261+
f"- Direct command: `{direct}`\n"
1262+
f" Rejected wrapper: `{rejected}`\n"
1263+
" Output:\n"
1264+
" ```text\n"
1265+
f"{truncated}\n"
1266+
" ```"
1267+
)
1268+
if total_chars + len(block) > _MAX_WRAPPER_BOOTSTRAP_TOTAL_CHARS and blocks:
1269+
break
1270+
blocks.append(block)
1271+
total_chars += len(block)
1272+
if not blocks:
1273+
return ""
1274+
return "\n".join(
1275+
[
1276+
"Direct command context bootstrap:",
1277+
"The backend already ran safe read-only direct replacements for some rejected wrapper commands.",
1278+
"Use these outputs as current repo context and do not rerun the wrapped variants.",
1279+
*blocks[:6],
1280+
]
1281+
)
1282+
1283+
11111284
def _merge_usage_records(first: Any, second: Any) -> Dict[str, Any]:
11121285
first_record = first if isinstance(first, dict) else {}
11131286
second_record = second if isinstance(second, dict) else {}
@@ -1547,18 +1720,28 @@ def _drain_stderr() -> None:
15471720
hard=hard_recovery,
15481721
)
15491722
if recovery_guidance:
1723+
bootstrap_context = (
1724+
_build_wrapper_bootstrap_context(repo, rejected_shell_wrappers)
1725+
if hard_recovery
1726+
else ""
1727+
)
15501728
log.warning(
15511729
"Codex hit a shell-wrapper rejection loop; retrying once with "
15521730
+ (
15531731
"strict no-wrapper recovery guidance."
1732+
+ (" Added direct-command context bootstrap." if bootstrap_context else "")
15541733
if hard_recovery
15551734
else "direct-command recovery guidance."
15561735
)
15571736
)
15581737
retry_result = _run_codex_task(
15591738
repo,
15601739
instruction,
1561-
[*effective_supplemental_guidance, recovery_guidance],
1740+
[
1741+
*effective_supplemental_guidance,
1742+
*( [bootstrap_context] if bootstrap_context else [] ),
1743+
recovery_guidance,
1744+
],
15621745
wrapper_recovery_attempt=wrapper_recovery_attempt + 1,
15631746
baseline_changes=baseline_snapshot,
15641747
)

apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py

Lines changed: 58 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from openai_codex_executor import (
2525
OpenAICodexRuntimeConfig,
2626
_augment_supplemental_guidance,
27+
_build_wrapper_bootstrap_context,
2728
_build_wrapper_recovery_guidance,
2829
_run_codex_task,
2930
_resolve_reasoning_effort,
@@ -295,6 +296,56 @@ def test_wrapper_hard_recovery_guidance_requires_direct_replacements_first(self)
295296
self.assertIn("first command invocation on this retry must be one of the direct replacements", lowered)
296297
self.assertIn("`/bin/bash -lc 'git status --porcelain'` -> `git status --porcelain`", guidance)
297298

299+
def test_build_wrapper_bootstrap_context_runs_safe_direct_replacements(self) -> None:
300+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-bootstrap-") as temp_dir:
301+
repo = Path(temp_dir) / "repo"
302+
repo.mkdir(parents=True, exist_ok=True)
303+
(repo / "README.md").write_text("# wrapper bootstrap test\n", encoding="utf-8")
304+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
305+
subprocess.run(
306+
["git", "config", "user.name", "PushPals Test"],
307+
cwd=repo,
308+
check=True,
309+
capture_output=True,
310+
text=True,
311+
)
312+
subprocess.run(
313+
["git", "config", "user.email", "pushpals-tests@example.com"],
314+
cwd=repo,
315+
check=True,
316+
capture_output=True,
317+
text=True,
318+
)
319+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
320+
subprocess.run(
321+
["git", "commit", "-m", "chore: seed wrapper bootstrap repo"],
322+
cwd=repo,
323+
check=True,
324+
capture_output=True,
325+
text=True,
326+
)
327+
328+
guidance = _build_wrapper_bootstrap_context(
329+
str(repo),
330+
[
331+
"/bin/bash -lc pwd",
332+
"/bin/bash -c pwd",
333+
"/bin/bash -lc ls",
334+
"/bin/bash -lc 'git branch --show-current'",
335+
"/bin/bash -lc 'cat README.md'",
336+
"/bin/bash -lc 'git diff --output=leak.txt'",
337+
],
338+
)
339+
340+
self.assertIn("Direct command context bootstrap:", guidance)
341+
self.assertIn("Direct command: `pwd`", guidance)
342+
self.assertIn("Direct command: `ls`", guidance)
343+
self.assertIn("Direct command: `git branch --show-current`", guidance)
344+
self.assertIn("Direct command: `cat README.md`", guidance)
345+
self.assertIn("README.md", guidance)
346+
self.assertIn("# wrapper bootstrap test", guidance)
347+
self.assertNotIn("git diff --output=leak.txt", guidance)
348+
298349
def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
299350
with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-recovery-") as temp_dir:
300351
repo = Path(temp_dir) / "repo"
@@ -341,13 +392,16 @@ def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
341392
"",
342393
"prompt = sys.stdin.read()",
343394
"hard_marker = 'Your first command invocation on this retry must be one of the direct replacements listed below'",
344-
"if hard_marker in prompt:",
395+
"bootstrap_marker = 'Direct command context bootstrap:'",
396+
"pwd_marker = 'Direct command: `pwd`'",
397+
"branch_marker = 'Direct command: `git branch --show-current`'",
398+
"if hard_marker in prompt and bootstrap_marker in prompt and pwd_marker in prompt and branch_marker in prompt:",
345399
" if last_message_path:",
346400
" Path(last_message_path).write_text(",
347-
" 'Recovered by switching to direct commands after strict wrapper recovery.',",
401+
" 'Recovered by using backend-supplied direct command bootstrap after strict wrapper recovery.',",
348402
" encoding='utf-8',",
349403
" )",
350-
" print('item.completed | Used direct commands after strict recovery guidance.', flush=True)",
404+
" print('item.completed | Used backend bootstrap context after strict recovery guidance.', flush=True)",
351405
" sys.exit(0)",
352406
"",
353407
"for line in (",
@@ -380,6 +434,7 @@ def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
380434
self.assertTrue(result.get("ok"), result)
381435
self.assertIn("Recovered after Codex attempts hit command-router shell-wrapper rejections.", str(result.get("stdout") or ""))
382436
self.assertIn("strict wrapper recovery", str(result.get("stdout") or "").lower())
437+
self.assertIn("backend-supplied direct command bootstrap", str(result.get("stdout") or ""))
383438

384439
def test_usage_falls_back_to_estimate_when_trace_has_no_usage(self) -> None:
385440
usage = _usage_from_trace_or_estimate({}, "abc" * 30, "done", model="gpt-5.4")

apps/workerpals/src/execute_job.ts

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,13 @@ export function shouldEnqueueNoChangeReviewCompletion(
333333
return extractReviewFixContext(params) == null;
334334
}
335335

336+
function reviewAgentAllowsMultiRootScope(value: unknown): boolean {
337+
const normalized = String(value ?? "")
338+
.trim()
339+
.toLowerCase();
340+
return normalized === "review_fix" || normalized === "merge_conflict";
341+
}
342+
336343
export function deriveQualityGatePolicy(
337344
params: Record<string, unknown> | null | undefined,
338345
runtimeConfig: WorkerpalsRuntimeConfig = DEFAULT_CONFIG,
@@ -3217,6 +3224,7 @@ function validateTaskExecutePlanning(
32173224
options?: {
32183225
origin?: "autonomy" | "user";
32193226
autonomyComponentArea?: unknown;
3227+
reviewAgentResolutionType?: unknown;
32203228
},
32213229
): { ok: true } | { ok: false; message: string } {
32223230
if (!value || typeof value !== "object" || Array.isArray(value)) {
@@ -3300,21 +3308,26 @@ function validateTaskExecutePlanning(
33003308
const normalizedWriteGlobs = isStringArray(scope.writeGlobs)
33013309
? toStringArray(scope.writeGlobs)
33023310
: [];
3311+
const allowMultiRootAutonomyScope =
3312+
origin === "autonomy" &&
3313+
reviewAgentAllowsMultiRootScope(options?.reviewAgentResolutionType);
33033314
if (origin === "autonomy") {
33043315
const declaredComponentArea = asAutonomyComponentArea(options?.autonomyComponentArea);
3305-
const inferredComponentArea = deriveAutonomyComponentArea(
3306-
normalizedTargetPaths,
3307-
normalizedWriteGlobs,
3308-
);
3309-
const componentArea = declaredComponentArea ?? inferredComponentArea;
3310-
if (!componentArea) {
3316+
const inferredComponentArea = allowMultiRootAutonomyScope
3317+
? null
3318+
: deriveAutonomyComponentArea(normalizedTargetPaths, normalizedWriteGlobs);
3319+
const componentArea = allowMultiRootAutonomyScope
3320+
? declaredComponentArea
3321+
: declaredComponentArea ?? inferredComponentArea;
3322+
if (!allowMultiRootAutonomyScope && !componentArea) {
33113323
return {
33123324
ok: false,
33133325
message:
33143326
"task.execute planning.targetPaths must resolve to a repo-relative componentArea",
33153327
};
33163328
}
33173329
if (
3330+
!allowMultiRootAutonomyScope &&
33183331
declaredComponentArea &&
33193332
inferredComponentArea &&
33203333
declaredComponentArea !== inferredComponentArea
@@ -3328,7 +3341,7 @@ function validateTaskExecutePlanning(
33283341
componentArea,
33293342
normalizedTargetPaths,
33303343
normalizedWriteGlobs,
3331-
{ requireWriteGlobs: false },
3344+
{ requireWriteGlobs: false, allowMultipleComponentRoots: allowMultiRootAutonomyScope },
33323345
);
33333346
if (!validatedScope.ok) {
33343347
return {
@@ -3686,9 +3699,14 @@ export async function executeJob(
36863699
params.autonomy && typeof params.autonomy === "object" && !Array.isArray(params.autonomy)
36873700
? (params.autonomy as Record<string, unknown>)
36883701
: null;
3702+
const reviewAgent =
3703+
params.reviewAgent && typeof params.reviewAgent === "object" && !Array.isArray(params.reviewAgent)
3704+
? (params.reviewAgent as Record<string, unknown>)
3705+
: null;
36893706
const planningValidation = validateTaskExecutePlanning(params.planning, {
36903707
origin,
36913708
autonomyComponentArea: autonomyScope?.componentArea ?? autonomyScope?.component_area,
3709+
reviewAgentResolutionType: reviewAgent?.resolutionType,
36923710
});
36933711
if (!planningValidation.ok) {
36943712
return {

0 commit comments

Comments
 (0)