feat(security): make secret redaction off by default (#16794)

teknium1 · Patrick Daley · commit 3dd35a9509ef · 2026-04-28T17:03:10.000-04:00
Flips security.redact_secrets from true to false in DEFAULT_CONFIG, and
the HERMES_REDACT_SECRETS env-var fallback in agent/redact.py now
requires explicit opt-in ("1"/"true"/"yes"/"on") to enable.

New installs and users without a security.redact_secrets key get pass-
through tool output. Existing users whose config.yaml explicitly sets
redact_secrets: true keep redaction on — the config-yaml -&gt; env-var
bridges in hermes_cli/main.py and gateway/run.py still honor their
setting.

Also updates the inline config comments, website docs, and the
hermes-agent skill so /hermes config set security.redact_secrets true
is now the documented way to turn it on.
diff --git a/agent/redact.py b/agent/redact.py
@@ -56,8 +56,12 @@
 })
 
 # Snapshot at import time so runtime env mutations (e.g. LLM-generated
-# `export HERMES_REDACT_SECRETS=false`) cannot disable redaction mid-session.
-_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() not in ("0", "false", "no", "off")
+# `export HERMES_REDACT_SECRETS=true`) cannot enable/disable redaction
+# mid-session.  OFF by default — user must opt in via
+# `security.redact_secrets: true` in config.yaml (bridged to this env var
+# in hermes_cli/main.py and gateway/run.py) or `HERMES_REDACT_SECRETS=true`
+# in ~/.hermes/.env.
+_REDACT_ENABLED = os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("1", "true", "yes", "on")
 
 # Known API key prefixes -- match the prefix + contiguous token chars
 _PREFIX_PATTERNS = [
@@ -257,7 +261,7 @@ def redact_sensitive_text(text: str) -> str:
     """Apply all redaction patterns to a block of text.
 
     Safe to call on any string -- non-matching text passes through unchanged.
-    Disabled when security.redact_secrets is false in config.yaml.
+    Disabled by default — enable via security.redact_secrets: true in config.yaml.
     """
     if text is None:
         return None
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
@@ -942,7 +942,7 @@ def _ensure_hermes_home_managed(home: Path):
     # Pre-exec security scanning via tirith
     "security": {
         "allow_private_urls": False,  # Allow requests to private/internal IPs (for OpenWrt, proxies, VPNs)
-        "redact_secrets": True,
+        "redact_secrets": False,
         "tirith_enabled": True,
         "tirith_path": "tirith",
         "tirith_timeout": 5,
@@ -3337,14 +3337,16 @@ def load_config() -> Dict[str, Any]:
 
 _SECURITY_COMMENT = """
 # ── Security ──────────────────────────────────────────────────────────
-# API keys, tokens, and passwords are redacted from tool output by default.
-# Set to false to see full values (useful for debugging auth issues).
+# Secret redaction is OFF by default — tool output (terminal stdout,
+# read_file results, web content) passes through unmodified. Set
+# redact_secrets to true to mask strings that look like API keys, tokens,
+# and passwords before they enter the model context and logs.
 # tirith pre-exec scanning is enabled by default when the tirith binary
 # is available. Configure via security.tirith_* keys or env vars
 # (TIRITH_ENABLED, TIRITH_BIN, TIRITH_TIMEOUT, TIRITH_FAIL_OPEN).
 #
 # security:
-#   redact_secrets: false
+#   redact_secrets: true
 #   tirith_enabled: true
 #   tirith_path: "tirith"
 #   tirith_timeout: 5
@@ -3377,11 +3379,11 @@ def load_config() -> Dict[str, Any]:
 
 _COMMENTED_SECTIONS = """
 # ── Security ──────────────────────────────────────────────────────────
-# API keys, tokens, and passwords are redacted from tool output by default.
-# Set to false to see full values (useful for debugging auth issues).
+# Secret redaction is OFF by default. Set to true to mask strings that
+# look like API keys, tokens, and passwords in tool output and logs.
 #
 # security:
-#   redact_secrets: false
+#   redact_secrets: true
 
 # ── Fallback Model ────────────────────────────────────────────────────
 # Automatic provider failover when primary is unavailable.
diff --git a/skills/autonomous-ai-agents/hermes-agent/SKILL.md b/skills/autonomous-ai-agents/hermes-agent/SKILL.md
@@ -408,17 +408,17 @@ Common "why is Hermes doing X to my output / tool calls / commands?" toggles —
 
 ### Secret redaction in tool output
 
-Hermes auto-redacts strings that look like API keys, tokens, and secrets in all tool output (terminal stdout, `read_file`, web content, subagent summaries, etc.) so the model never sees raw credentials. If the user is intentionally working with mock tokens, share-management tokens, or their own secrets and the redaction is getting in the way:
+Secret redaction is **off by default** — tool output (terminal stdout, `read_file`, web content, subagent summaries, etc.) passes through unmodified. If the user wants Hermes to auto-mask strings that look like API keys, tokens, and secrets before they enter the conversation context and logs:
 
 ```bash
-hermes config set security.redact_secrets false      # disable globally
+hermes config set security.redact_secrets true       # enable globally
 ```
 
-**Restart required.** `security.redact_secrets` is snapshotted at import time — setting it mid-session (e.g. via `export HERMES_REDACT_SECRETS=false` from a tool call) will NOT take effect for the running process. Tell the user to run `hermes config set security.redact_secrets false` in a terminal, then start a new session. This is deliberate — it prevents an LLM from turning off redaction on itself mid-task.
+**Restart required.** `security.redact_secrets` is snapshotted at import time — toggling it mid-session (e.g. via `export HERMES_REDACT_SECRETS=true` from a tool call) will NOT take effect for the running process. Tell the user to run `hermes config set security.redact_secrets true` in a terminal, then start a new session. This is deliberate — it prevents an LLM from flipping the toggle on itself mid-task.
 
-Re-enable with:
+Disable again with:
 ```bash
-hermes config set security.redact_secrets true
+hermes config set security.redact_secrets false
 ```
 
 ### PII redaction in gateway messages
diff --git a/tests/hermes_cli/test_redact_config_bridge.py b/tests/hermes_cli/test_redact_config_bridge.py
@@ -72,8 +72,12 @@ def test_redact_secrets_false_in_config_yaml_is_honored(tmp_path):
     assert "ENV_VAR=false" in result.stdout
 
 
-def test_redact_secrets_default_true_when_unset(tmp_path):
-    """Without the config key, redaction stays on by default."""
+def test_redact_secrets_default_false_when_unset(tmp_path):
+    """Without the config key, redaction stays OFF by default.
+
+    Secret redaction is opt-in — users who want it must set
+    `security.redact_secrets: true` explicitly (or HERMES_REDACT_SECRETS=true).
+    """
     hermes_home = tmp_path / ".hermes"
     hermes_home.mkdir()
     (hermes_home / "config.yaml").write_text("{}\n")  # empty config
@@ -103,7 +107,53 @@ def test_redact_secrets_default_true_when_unset(tmp_path):
         timeout=30,
     )
     assert result.returncode == 0, f"probe failed: {result.stderr}"
-    assert "REDACT_ENABLED=True" in result.stdout
+    assert "REDACT_ENABLED=False" in result.stdout
+
+
+def test_redact_secrets_true_in_config_yaml_is_honored(tmp_path):
+    """Setting `security.redact_secrets: true` in config.yaml must enable
+    redaction — even though it's set in YAML, not as an env var."""
+    hermes_home = tmp_path / ".hermes"
+    hermes_home.mkdir()
+    (hermes_home / "config.yaml").write_text(
+        textwrap.dedent(
+            """\
+            security:
+              redact_secrets: true
+            """
+        )
+    )
+    (hermes_home / ".env").write_text("")
+
+    probe = textwrap.dedent(
+        """\
+        import sys, os
+        os.environ.pop("HERMES_REDACT_SECRETS", None)
+        sys.path.insert(0, %r)
+        import hermes_cli.main
+        import agent.redact
+        print(f"REDACT_ENABLED={agent.redact._REDACT_ENABLED}")
+        print(f"ENV_VAR={os.environ.get('HERMES_REDACT_SECRETS', '<unset>')}")
+        """
+    ) % str(REPO_ROOT)
+
+    env = dict(os.environ)
+    env["HERMES_HOME"] = str(hermes_home)
+    env.pop("HERMES_REDACT_SECRETS", None)
+
+    result = subprocess.run(
+        [sys.executable, "-c", probe],
+        env=env,
+        capture_output=True,
+        text=True,
+        cwd=str(REPO_ROOT),
+        timeout=30,
+    )
+    assert result.returncode == 0, f"probe failed: {result.stderr}"
+    assert "REDACT_ENABLED=True" in result.stdout, (
+        f"Config toggle not honored.\nstdout: {result.stdout}\nstderr: {result.stderr}"
+    )
+    assert "ENV_VAR=true" in result.stdout
 
 
 def test_dotenv_redact_secrets_beats_config_yaml(tmp_path):
diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md
@@ -1302,7 +1302,7 @@ Pre-execution security scanning and secret redaction:
 
 ```yaml
 security:
-  redact_secrets: true           # Redact API key patterns in tool output and logs
+  redact_secrets: false          # Redact API key patterns in tool output and logs (off by default)
   tirith_enabled: true           # Enable Tirith security scanning for terminal commands
   tirith_path: "tirith"          # Path to tirith binary (default: "tirith" in $PATH)
   tirith_timeout: 5              # Seconds to wait for tirith scan before timing out
@@ -1313,7 +1313,7 @@ security:
     shared_files: []
 ```
 
-- `redact_secrets` — automatically detects and redacts patterns that look like API keys, tokens, and passwords in tool output before it enters the conversation context and logs.
+- `redact_secrets` — when `true`, automatically detects and redacts patterns that look like API keys, tokens, and passwords in tool output before it enters the conversation context and logs. **Off by default** — enable if you commonly work with real credentials in tool output and want a safety net. Set to `true` explicitly to turn on.
 - `tirith_enabled` — when `true`, terminal commands are scanned by [Tirith](https://github.com/StackGuardian/tirith) before execution to detect potentially dangerous operations.
 - `tirith_path` — path to the tirith binary. Set this if tirith is installed in a non-standard location.
 - `tirith_timeout` — maximum seconds to wait for a tirith scan. Commands proceed if the scan times out.