Skip to content

Commit e5e9e19

Browse files
committed
fix(workerpals): harden codex wrapper-shell recovery
1 parent 6de923d commit e5e9e19

7 files changed

Lines changed: 306 additions & 18 deletions

File tree

apps/workerpals/src/backends/openai_codex/openai_codex_executor.py

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
_VALID_COLORS = {"always", "never", "auto"}
9696
_VALID_AUTH_MODES = {"auto", "api_key", "chatgpt"}
9797
_VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
98+
_MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
9899

99100

100101
def _model_supports_xhigh_reasoning(model: str) -> bool:
@@ -296,6 +297,21 @@ def _command_router_recovery_guidance() -> str:
296297
)
297298

298299

300+
def _command_router_hard_recovery_guidance() -> str:
301+
guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Hard Recovery Guidance")
302+
if guidance:
303+
return guidance
304+
return (
305+
"Command-router escalation: the previous retry still attempted disallowed shell wrappers.\n"
306+
"Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, "
307+
"or `pwsh.exe` as the command itself on this attempt.\n"
308+
"Your first command invocation on this retry must be one of the direct replacements listed "
309+
"below, with no wrapper shell around it.\n"
310+
"After you re-establish repo context, continue using ordinary shell commands directly "
311+
"without wrapper shells."
312+
)
313+
314+
299315
def _command_router_rejection_detail_intro() -> str:
300316
guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Rejection Detail")
301317
if guidance:
@@ -1066,7 +1082,7 @@ def _unwrap_shell_wrapper_command(command: str) -> str:
10661082
return ""
10671083

10681084

1069-
def _build_wrapper_recovery_guidance(rejected_commands: List[str]) -> str:
1085+
def _build_wrapper_direct_replacements(rejected_commands: List[str]) -> List[str]:
10701086
direct_equivalents: List[str] = []
10711087
seen: set[str] = set()
10721088
for command in rejected_commands:
@@ -1076,7 +1092,16 @@ def _build_wrapper_recovery_guidance(rejected_commands: List[str]) -> str:
10761092
continue
10771093
seen.add(lowered)
10781094
direct_equivalents.append(f"- `{command}` -> `{direct}`")
1079-
guidance_lines = [_command_router_recovery_guidance()]
1095+
return direct_equivalents
1096+
1097+
1098+
def _build_wrapper_recovery_guidance(rejected_commands: List[str], *, hard: bool = False) -> str:
1099+
guidance_lines = [
1100+
_command_router_hard_recovery_guidance()
1101+
if hard
1102+
else _command_router_recovery_guidance()
1103+
]
1104+
direct_equivalents = _build_wrapper_direct_replacements(rejected_commands)
10801105
if direct_equivalents:
10811106
guidance_lines.append("Use these direct replacements for the rejected commands:")
10821107
guidance_lines.extend(direct_equivalents[:6])
@@ -1515,11 +1540,20 @@ def _drain_stderr() -> None:
15151540
log_git_status(repo, log)
15161541

15171542
if command_policy_rejection_loop:
1518-
if wrapper_recovery_attempt < 1:
1519-
recovery_guidance = _build_wrapper_recovery_guidance(rejected_shell_wrappers)
1543+
if wrapper_recovery_attempt < _MAX_WRAPPER_RECOVERY_ATTEMPTS:
1544+
hard_recovery = wrapper_recovery_attempt >= 1
1545+
recovery_guidance = _build_wrapper_recovery_guidance(
1546+
rejected_shell_wrappers,
1547+
hard=hard_recovery,
1548+
)
15201549
if recovery_guidance:
15211550
log.warning(
1522-
"Codex hit a shell-wrapper rejection loop; retrying once with direct-command recovery guidance."
1551+
"Codex hit a shell-wrapper rejection loop; retrying once with "
1552+
+ (
1553+
"strict no-wrapper recovery guidance."
1554+
if hard_recovery
1555+
else "direct-command recovery guidance."
1556+
)
15231557
)
15241558
retry_result = _run_codex_task(
15251559
repo,
@@ -1529,19 +1563,19 @@ def _drain_stderr() -> None:
15291563
baseline_changes=baseline_snapshot,
15301564
)
15311565
retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
1532-
if retry_result.get("ok"):
1566+
if wrapper_recovery_attempt == 0 and retry_result.get("ok"):
15331567
recovered_stdout = str(retry_result.get("stdout") or "").strip()
15341568
retry_result["stdout"] = _truncate(
15351569
(
1536-
"Recovered after the first Codex attempt hit command-router shell-wrapper rejections.\n\n"
1570+
"Recovered after Codex attempts hit command-router shell-wrapper rejections.\n\n"
15371571
f"{recovered_stdout}"
15381572
).strip()
15391573
)
1540-
else:
1574+
elif wrapper_recovery_attempt == 0:
15411575
retry_stderr = str(retry_result.get("stderr") or "").strip()
15421576
retry_result["stderr"] = _truncate(
15431577
(
1544-
"The first Codex attempt hit command-router shell-wrapper rejections and was retried once with direct-command recovery guidance.\n\n"
1578+
"Earlier Codex attempts hit command-router shell-wrapper rejections and were retried with stricter recovery guidance.\n\n"
15451579
f"{retry_stderr}"
15461580
).strip()
15471581
)

apps/workerpals/src/backends/openai_codex/test_openai_codex_runtime_config.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import os
22
import re
3+
import json
4+
import subprocess
35
import sys
46
import unittest
57
import tempfile
8+
from unittest import mock
69
from pathlib import Path
710

811
_HERE = Path(__file__).resolve().parent
@@ -22,6 +25,7 @@
2225
OpenAICodexRuntimeConfig,
2326
_augment_supplemental_guidance,
2427
_build_wrapper_recovery_guidance,
28+
_run_codex_task,
2529
_resolve_reasoning_effort,
2630
_build_instruction,
2731
_collect_disallowed_shell_wrapper_rejections,
@@ -280,6 +284,103 @@ def test_wrapper_recovery_guidance_allows_arbitrary_shell_commands_without_wrapp
280284
self.assertIn("not limited to a fixed allowlist", lowered)
281285
self.assertIn("`/bin/bash -lc 'git status --porcelain'` -> `git status --porcelain`", guidance)
282286

287+
def test_wrapper_hard_recovery_guidance_requires_direct_replacements_first(self) -> None:
288+
guidance = _build_wrapper_recovery_guidance(
289+
["/bin/bash -lc 'git status --porcelain'", "/bin/bash -lc pwd"],
290+
hard=True,
291+
)
292+
lowered = guidance.lower()
293+
self.assertIn("previous retry still attempted disallowed shell wrappers", lowered)
294+
self.assertIn("do not invoke `bash`", lowered)
295+
self.assertIn("first command invocation on this retry must be one of the direct replacements", lowered)
296+
self.assertIn("`/bin/bash -lc 'git status --porcelain'` -> `git status --porcelain`", guidance)
297+
298+
def test_run_codex_task_escalates_wrapper_recovery_and_recovers(self) -> None:
299+
with tempfile.TemporaryDirectory(prefix="pushpals-codex-wrapper-recovery-") as temp_dir:
300+
repo = Path(temp_dir) / "repo"
301+
repo.mkdir(parents=True, exist_ok=True)
302+
(repo / "README.md").write_text("# wrapper recovery test\n", encoding="utf-8")
303+
subprocess.run(["git", "init"], cwd=repo, check=True, capture_output=True, text=True)
304+
subprocess.run(
305+
["git", "config", "user.name", "PushPals Test"],
306+
cwd=repo,
307+
check=True,
308+
capture_output=True,
309+
text=True,
310+
)
311+
subprocess.run(
312+
["git", "config", "user.email", "pushpals-tests@example.com"],
313+
cwd=repo,
314+
check=True,
315+
capture_output=True,
316+
text=True,
317+
)
318+
subprocess.run(["git", "add", "README.md"], cwd=repo, check=True, capture_output=True, text=True)
319+
subprocess.run(
320+
["git", "commit", "-m", "chore: seed wrapper recovery repo"],
321+
cwd=repo,
322+
check=True,
323+
capture_output=True,
324+
text=True,
325+
)
326+
327+
stub_path = Path(temp_dir) / "fake_codex_wrapper_recovery.py"
328+
stub_path.write_text(
329+
"\n".join(
330+
[
331+
"from pathlib import Path",
332+
"import sys",
333+
"import time",
334+
"",
335+
"argv = sys.argv[1:]",
336+
"last_message_path = None",
337+
"for index, arg in enumerate(argv):",
338+
" if arg == '--output-last-message' and index + 1 < len(argv):",
339+
" last_message_path = argv[index + 1]",
340+
" break",
341+
"",
342+
"prompt = sys.stdin.read()",
343+
"hard_marker = 'Your first command invocation on this retry must be one of the direct replacements listed below'",
344+
"if hard_marker in prompt:",
345+
" if last_message_path:",
346+
" Path(last_message_path).write_text(",
347+
" 'Recovered by switching to direct commands after strict wrapper recovery.',",
348+
" encoding='utf-8',",
349+
" )",
350+
" print('item.completed | Used direct commands after strict recovery guidance.', flush=True)",
351+
" sys.exit(0)",
352+
"",
353+
"for line in (",
354+
" 'error=exec_command failed for `/bin/bash -lc pwd`: CreateProcess { message: \"Rejected\" }',",
355+
" 'error=exec_command failed for `/bin/bash -lc \\'git branch --show-current\\'`: CreateProcess { message: \"Rejected\" }',",
356+
" 'error=exec_command failed for `/bin/bash -lc ls`: CreateProcess { message: \"Rejected\" }',",
357+
" 'error=exec_command failed for `/bin/bash -lc \\'git status --porcelain\\'`: CreateProcess { message: \"Rejected\" }',",
358+
"):",
359+
" print(line, file=sys.stderr, flush=True)",
360+
"time.sleep(10)",
361+
]
362+
),
363+
encoding="utf-8",
364+
)
365+
366+
env_overrides = {
367+
"PUSHPALS_OPENAI_CODEX_BIN_JSON": json.dumps([sys.executable, str(stub_path)]),
368+
"PUSHPALS_OPENAI_CODEX_AUTH_MODE": "api_key",
369+
"OPENAI_API_KEY": "pushpals-wrapper-recovery-test-key",
370+
"WORKERPALS_OPENAI_CODEX_TIMEOUT_S": "10",
371+
"WORKERPALS_OPENAI_CODEX_PROGRESS_LOG_INTERVAL_S": "1",
372+
}
373+
with mock.patch.dict(os.environ, env_overrides, clear=False):
374+
result = _run_codex_task(
375+
str(repo),
376+
"Inspect the repo and report the current branch.",
377+
[],
378+
)
379+
380+
self.assertTrue(result.get("ok"), result)
381+
self.assertIn("Recovered after Codex attempts hit command-router shell-wrapper rejections.", str(result.get("stdout") or ""))
382+
self.assertIn("strict wrapper recovery", str(result.get("stdout") or "").lower())
383+
283384
def test_usage_falls_back_to_estimate_when_trace_has_no_usage(self) -> None:
284385
usage = _usage_from_trace_or_estimate({}, "abc" * 30, "done", model="gpt-5.4")
285386
self.assertTrue(usage["estimated"])

packages/cli/runtime/prompts/workerpals/openai_codex_command_router_policy.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,11 @@ Command-router recovery: the previous attempt retried disallowed shell wrappers.
66
Retry once using shell commands normally, but invoke the inner command directly instead of wrapping it in `/bin/bash -lc`, `bash -c`, `sh -lc`, `cmd /c`, `powershell -Command`, or `pwsh -Command`.
77
You are not limited to a fixed allowlist of commands. The constraint is only that command execution must target the actual program/argv directly rather than a wrapper shell.
88

9+
## Hard Recovery Guidance
10+
Command-router escalation: the previous retry still attempted disallowed shell wrappers.
11+
Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, or `pwsh.exe` as the command itself on this attempt.
12+
Your first command invocation on this retry must be one of the direct replacements listed below, with no wrapper shell around it.
13+
After you re-establish repo context, continue using ordinary shell commands directly without wrapper shells.
14+
915
## Rejection Detail
1016
Codex repeatedly attempted disallowed shell-wrapper commands that the command router rejected. Shell commands are allowed, but wrapper shells are not; invoke the inner command directly and avoid wrapper retries.

packages/cli/runtime/sandbox/apps/workerpals/src/backends/openai_codex/openai_codex_executor.py

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@
9595
_VALID_COLORS = {"always", "never", "auto"}
9696
_VALID_AUTH_MODES = {"auto", "api_key", "chatgpt"}
9797
_VALID_REASONING_EFFORTS = {"low", "medium", "high", "xhigh"}
98+
_MAX_WRAPPER_RECOVERY_ATTEMPTS = 2
9899

99100

100101
def _model_supports_xhigh_reasoning(model: str) -> bool:
@@ -296,6 +297,21 @@ def _command_router_recovery_guidance() -> str:
296297
)
297298

298299

300+
def _command_router_hard_recovery_guidance() -> str:
301+
guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Hard Recovery Guidance")
302+
if guidance:
303+
return guidance
304+
return (
305+
"Command-router escalation: the previous retry still attempted disallowed shell wrappers.\n"
306+
"Do not invoke `bash`, `/bin/bash`, `sh`, `cmd`, `powershell`, `powershell.exe`, `pwsh`, "
307+
"or `pwsh.exe` as the command itself on this attempt.\n"
308+
"Your first command invocation on this retry must be one of the direct replacements listed "
309+
"below, with no wrapper shell around it.\n"
310+
"After you re-establish repo context, continue using ordinary shell commands directly "
311+
"without wrapper shells."
312+
)
313+
314+
299315
def _command_router_rejection_detail_intro() -> str:
300316
guidance = _load_markdown_h2_section(_COMMAND_ROUTER_POLICY_PATH, "Rejection Detail")
301317
if guidance:
@@ -1066,7 +1082,7 @@ def _unwrap_shell_wrapper_command(command: str) -> str:
10661082
return ""
10671083

10681084

1069-
def _build_wrapper_recovery_guidance(rejected_commands: List[str]) -> str:
1085+
def _build_wrapper_direct_replacements(rejected_commands: List[str]) -> List[str]:
10701086
direct_equivalents: List[str] = []
10711087
seen: set[str] = set()
10721088
for command in rejected_commands:
@@ -1076,7 +1092,16 @@ def _build_wrapper_recovery_guidance(rejected_commands: List[str]) -> str:
10761092
continue
10771093
seen.add(lowered)
10781094
direct_equivalents.append(f"- `{command}` -> `{direct}`")
1079-
guidance_lines = [_command_router_recovery_guidance()]
1095+
return direct_equivalents
1096+
1097+
1098+
def _build_wrapper_recovery_guidance(rejected_commands: List[str], *, hard: bool = False) -> str:
1099+
guidance_lines = [
1100+
_command_router_hard_recovery_guidance()
1101+
if hard
1102+
else _command_router_recovery_guidance()
1103+
]
1104+
direct_equivalents = _build_wrapper_direct_replacements(rejected_commands)
10801105
if direct_equivalents:
10811106
guidance_lines.append("Use these direct replacements for the rejected commands:")
10821107
guidance_lines.extend(direct_equivalents[:6])
@@ -1515,11 +1540,20 @@ def _drain_stderr() -> None:
15151540
log_git_status(repo, log)
15161541

15171542
if command_policy_rejection_loop:
1518-
if wrapper_recovery_attempt < 1:
1519-
recovery_guidance = _build_wrapper_recovery_guidance(rejected_shell_wrappers)
1543+
if wrapper_recovery_attempt < _MAX_WRAPPER_RECOVERY_ATTEMPTS:
1544+
hard_recovery = wrapper_recovery_attempt >= 1
1545+
recovery_guidance = _build_wrapper_recovery_guidance(
1546+
rejected_shell_wrappers,
1547+
hard=hard_recovery,
1548+
)
15201549
if recovery_guidance:
15211550
log.warning(
1522-
"Codex hit a shell-wrapper rejection loop; retrying once with direct-command recovery guidance."
1551+
"Codex hit a shell-wrapper rejection loop; retrying once with "
1552+
+ (
1553+
"strict no-wrapper recovery guidance."
1554+
if hard_recovery
1555+
else "direct-command recovery guidance."
1556+
)
15231557
)
15241558
retry_result = _run_codex_task(
15251559
repo,
@@ -1529,19 +1563,19 @@ def _drain_stderr() -> None:
15291563
baseline_changes=baseline_snapshot,
15301564
)
15311565
retry_result["usage"] = _merge_usage_records(usage, retry_result.get("usage"))
1532-
if retry_result.get("ok"):
1566+
if wrapper_recovery_attempt == 0 and retry_result.get("ok"):
15331567
recovered_stdout = str(retry_result.get("stdout") or "").strip()
15341568
retry_result["stdout"] = _truncate(
15351569
(
1536-
"Recovered after the first Codex attempt hit command-router shell-wrapper rejections.\n\n"
1570+
"Recovered after Codex attempts hit command-router shell-wrapper rejections.\n\n"
15371571
f"{recovered_stdout}"
15381572
).strip()
15391573
)
1540-
else:
1574+
elif wrapper_recovery_attempt == 0:
15411575
retry_stderr = str(retry_result.get("stderr") or "").strip()
15421576
retry_result["stderr"] = _truncate(
15431577
(
1544-
"The first Codex attempt hit command-router shell-wrapper rejections and was retried once with direct-command recovery guidance.\n\n"
1578+
"Earlier Codex attempts hit command-router shell-wrapper rejections and were retried with stricter recovery guidance.\n\n"
15451579
f"{retry_stderr}"
15461580
).strip()
15471581
)

0 commit comments

Comments
 (0)