fix: allow_patterns take priority over deny_patterns in ExecTool (HKUDS#3594)

chengyongru · Re-bin · cursoragent · web-flow · commit 5853d5dfda8a · 2026-05-03T00:27:17.000+08:00
* fix: allow_patterns take priority over deny_patterns in ExecTool

Previously deny_patterns were checked first with no bypass, meaning
allow_patterns could never exempt commands from the built-in deny list.
This made it impossible to whitelist destructive commands for specific
directories (e.g. build/cleanup tasks).

Changes:
- shell.py: check allow_patterns first; if matched, skip deny check
- shell.py: deny_patterns now appends to built-in list (not replaces)
- schema.py: add allow_patterns/deny_patterns to ExecToolConfig
- loop.py/subagent.py: pass allow_patterns/deny_patterns to ExecTool
- Add test_exec_allow_patterns.py covering priority semantics

* fix: separate deny pattern errors from workspace violation detection

The deny pattern error message "Command blocked by safety guard" was
included in _WORKSPACE_BLOCK_MARKERS, causing deny_pattern blocks to be
misclassified as fatal workspace violations. This meant LLMs had no
chance to retry with a different command — the turn was aborted
immediately.

Changes:
- shell.py: deny/allowlist error messages now use distinct phrasing
  ("blocked by deny pattern filter" / "blocked by allowlist filter")
- runner.py: remove "blocked by safety guard" from
  _WORKSPACE_BLOCK_MARKERS so deny_pattern errors are treated as normal
  tool errors (LLM can retry) instead of fatal violations
- workspace path errors still use "blocked by safety guard" and remain
  fatal as intended

* fix: update test assertions to match new deny pattern error message

* fix: indentation error in test file

* fix: restore SSRF fatal classification and tidy exec pattern plumbing

Address review feedback on the deny/allow_patterns rework:

- runner.py: re-add "internal/private url detected" to
  _WORKSPACE_BLOCK_MARKERS. The earlier marker removal also stripped
  fatal classification from SSRF / internal-URL rejections (whose
  message still says "blocked by safety guard"), turning a hard
  security boundary into something the LLM could retry.
- loop.py / subagent.py: drop `or None` between ExecToolConfig and
  ExecTool. The schema default is an empty list and ExecTool already
  normalizes None back to [], so the indirection was a no-op.
- shell.py: extract `explicitly_allowed` flag in _guard_command so
  allow_patterns are scanned once instead of twice and the control
  flow no longer relies on a no-op `pass + else` branch.
- tests/agent/test_runner.py: add a regression test asserting that
  the SSRF block message is treated as fatal, while deny/allowlist
  filter messages are deliberately non-fatal.

* fix: remove unused exec allow-pattern test import

Keep the new ExecTool allow-pattern coverage clean under ruff.

Co-authored-by: Cursor &lt;cursoragent@cursor.com&gt;

---------

Co-authored-by: Xubin Ren &lt;xubinrencs@gmail.com&gt;
Co-authored-by: Cursor &lt;cursoragent@cursor.com&gt;
diff --git a/nanobot/agent/loop.py b/nanobot/agent/loop.py
@@ -374,6 +374,8 @@ def _register_default_tools(self) -> None:
                     sandbox=self.exec_config.sandbox,
                     path_append=self.exec_config.path_append,
                     allowed_env_keys=self.exec_config.allowed_env_keys,
+                    allow_patterns=self.exec_config.allow_patterns,
+                    deny_patterns=self.exec_config.deny_patterns,
                 )
             )
         if self.web_config.enable:
diff --git a/nanobot/agent/runner.py b/nanobot/agent/runner.py
@@ -833,13 +833,13 @@ async def _run_tool(
 
     # Markers identifying tool results that represent a workspace / safety boundary rejection.
     _WORKSPACE_BLOCK_MARKERS: tuple[str, ...] = (
-        "blocked by safety guard",
         "outside the configured workspace",
         "outside allowed directory",
         "working_dir is outside",
         "working_dir could not be resolved",
         "path traversal detected",
         "path outside working dir",
+        "internal/private url detected",
     )
 
     @classmethod
diff --git a/nanobot/agent/subagent.py b/nanobot/agent/subagent.py
@@ -188,6 +188,8 @@ async def _on_checkpoint(payload: dict) -> None:
                     sandbox=self.exec_config.sandbox,
                     path_append=self.exec_config.path_append,
                     allowed_env_keys=self.exec_config.allowed_env_keys,
+                    allow_patterns=self.exec_config.allow_patterns,
+                    deny_patterns=self.exec_config.deny_patterns,
                 ))
             if self.web_config.enable:
                 tools.register(
diff --git a/nanobot/agent/tools/shell.py b/nanobot/agent/tools/shell.py
@@ -52,7 +52,7 @@ def __init__(
         self.timeout = timeout
         self.working_dir = working_dir
         self.sandbox = sandbox
-        self.deny_patterns = deny_patterns or [
+        self.deny_patterns = (deny_patterns or []) + [
             r"\brm\s+-[rf]{1,2}\b",          # rm -r, rm -rf, rm -fr
             r"\bdel\s+/[fq]\b",              # del /f, del /q
             r"\brmdir\s+/s\b",               # rmdir /s
@@ -273,13 +273,19 @@ def _guard_command(self, command: str, cwd: str) -> str | None:
         cmd = command.strip()
         lower = cmd.lower()
 
-        for pattern in self.deny_patterns:
-            if re.search(pattern, lower):
-                return "Error: Command blocked by safety guard (dangerous pattern detected)"
+        # allow_patterns take priority over deny_patterns so that users can
+        # exempt specific commands (e.g. "rm -rf" inside a build directory)
+        # from the hardcoded deny list via configuration.
+        explicitly_allowed = bool(self.allow_patterns) and any(
+            re.search(p, lower) for p in self.allow_patterns
+        )
+        if not explicitly_allowed:
+            for pattern in self.deny_patterns:
+                if re.search(pattern, lower):
+                    return "Error: Command blocked by deny pattern filter"
 
-        if self.allow_patterns:
-            if not any(re.search(p, lower) for p in self.allow_patterns):
-                return "Error: Command blocked by safety guard (not in allowlist)"
+            if self.allow_patterns:
+                return "Error: Command blocked by allowlist filter (not in allowlist)"
 
         from nanobot.security.network import contains_internal_url
         if contains_internal_url(cmd):
diff --git a/nanobot/config/schema.py b/nanobot/config/schema.py
@@ -223,6 +223,8 @@ class ExecToolConfig(Base):
     path_append: str = ""
     sandbox: str = ""  # sandbox backend: "" (none) or "bwrap"
     allowed_env_keys: list[str] = Field(default_factory=list)  # Env var names to pass through to subprocess (e.g. ["GOPATH", "JAVA_HOME"])
+    allow_patterns: list[str] = Field(default_factory=list)  # Regex patterns that bypass deny_patterns (e.g. [r"rm\s+-rf\s+/tmp/"])
+    deny_patterns: list[str] = Field(default_factory=list)  # Extra regex patterns to block (appended to built-in list)
 
 class MCPServerConfig(Base):
     """MCP server connection configuration (stdio or HTTP)."""
diff --git a/tests/agent/test_runner.py b/tests/agent/test_runner.py
@@ -352,6 +352,27 @@ async def test_runner_stops_on_workspace_violation_without_fail_on_tool_error():
     ]
 
 
+def test_is_workspace_violation_recognizes_ssrf_block():
+    """Internal/private URL block must be classified as a fatal workspace violation.
+
+    Regression guard: the deny/allowlist filter messages were intentionally split
+    out of `_WORKSPACE_BLOCK_MARKERS` so the LLM can retry, but SSRF rejections
+    are a hard security boundary and must remain fatal.
+    """
+    from nanobot.agent.runner import AgentRunner
+
+    ssrf_msg = "Error: Command blocked by safety guard (internal/private URL detected)"
+    assert AgentRunner._is_workspace_violation(ssrf_msg) is True
+
+    # Sanity: deny/allowlist filter messages are deliberately *not* fatal.
+    assert AgentRunner._is_workspace_violation(
+        "Error: Command blocked by deny pattern filter"
+    ) is False
+    assert AgentRunner._is_workspace_violation(
+        "Error: Command blocked by allowlist filter (not in allowlist)"
+    ) is False
+
+
 @pytest.mark.asyncio
 async def test_runner_persists_large_tool_results_for_follow_up_calls(tmp_path):
     from nanobot.agent.runner import AgentRunSpec, AgentRunner
diff --git a/tests/tools/test_exec_allow_patterns.py b/tests/tools/test_exec_allow_patterns.py
@@ -0,0 +1,58 @@
+"""Tests for allow_patterns priority over deny_patterns."""
+
+from __future__ import annotations
+
+from nanobot.agent.tools.shell import ExecTool
+
+
+def test_deny_patterns_block_rm_rf():
+    """Baseline: rm -rf is blocked by default deny list."""
+    tool = ExecTool()
+    result = tool._guard_command("rm -rf /tmp/build", "/tmp")
+    assert result is not None
+    assert "deny pattern filter" in result.lower()
+
+
+def test_allow_patterns_bypass_deny():
+    """allow_patterns take priority: matching command skips deny check."""
+    tool = ExecTool(allow_patterns=[r"rm\s+-rf\s+/tmp/"])
+    result = tool._guard_command("rm -rf /tmp/build", "/tmp")
+    assert result is None
+
+
+def test_allow_patterns_must_match_to_bypass():
+    """Non-matching allow_patterns do NOT bypass deny."""
+    tool = ExecTool(allow_patterns=[r"rm\s+-rf\s+/opt/"])
+    result = tool._guard_command("rm -rf /tmp/build", "/tmp")
+    assert result is not None
+    assert "deny pattern filter" in result.lower()
+
+
+def test_extra_deny_patterns_from_config():
+    """User-supplied deny patterns are appended to built-in list."""
+    tool = ExecTool(deny_patterns=[r"\bping\b"])
+    # ping is blocked by extra deny
+    assert tool._guard_command("ping example.com", "/tmp") is not None
+    # rm -rf still blocked by built-in deny
+    assert tool._guard_command("rm -rf /tmp/x", "/tmp") is not None
+
+
+def test_allow_patterns_bypass_extra_deny():
+    """allow_patterns also bypasses user-supplied deny patterns."""
+    tool = ExecTool(
+        deny_patterns=[r"\bping\b"],
+        allow_patterns=[r"\bping\s+example\.com\b"],
+    )
+    result = tool._guard_command("ping example.com", "/tmp")
+    assert result is None
+
+
+def test_allow_patterns_is_whitelist_only():
+    """When allow_patterns is set, non-matching non-denied commands are blocked."""
+    tool = ExecTool(allow_patterns=[r"\becho\b"])
+    # echo matches allow → ok
+    assert tool._guard_command("echo hello", "/tmp") is None
+    # ls does not match allow and is not in deny → blocked by allowlist
+    result = tool._guard_command("ls /tmp", "/tmp")
+    assert result is not None
+    assert "allowlist" in result.lower()
diff --git a/tests/tools/test_exec_security.py b/tests/tools/test_exec_security.py
@@ -94,7 +94,7 @@ def test_exec_blocks_writes_to_history_jsonl(command):
     tool = ExecTool()
     result = tool._guard_command(command, "/tmp")
     assert result is not None
-    assert "dangerous pattern" in result.lower()
+    assert "deny pattern filter" in result.lower()
 
 
 @pytest.mark.parametrize(

Original file line number	Diff line number	Diff line change
`@@ -374,6 +374,8 @@ def _register_default_tools(self) -> None:`
`374`	`374`	`sandbox=self.exec_config.sandbox,`
`375`	`375`	`path_append=self.exec_config.path_append,`
`376`	`376`	`allowed_env_keys=self.exec_config.allowed_env_keys,`
	`377`	`+ allow_patterns=self.exec_config.allow_patterns,`
	`378`	`+ deny_patterns=self.exec_config.deny_patterns,`
`377`	`379`	`)`
`378`	`380`	`)`
`379`	`381`	`if self.web_config.enable:`