Skip to content

Commit 307746e

Browse files
teknium1RationallyPrime
authored andcommitted
fix(browser): enforce cloud-metadata SSRF floor in hybrid routing (NousResearch#16234) (NousResearch#21228)
Cloud metadata endpoints (169.254.169.254 etc.) are now always blocked by browser_navigate regardless of hybrid routing, allow_private_urls, or backend. Bug: commit 42c076d (NousResearch#16136) added hybrid routing that flips auto_local_this_nav=True for private URLs and short-circuits _is_safe_url(). IMDS endpoints are technically private (169.254/16 link-local), so the sidecar happily routed them to a local Chromium, and the agent could read IAM credentials via browser_snapshot. On EC2/GCP/Azure this is a full SSRF-to-credential-theft. Fix: new is_always_blocked_url() in url_safety.py — a narrow floor that checks _BLOCKED_HOSTNAMES, _ALWAYS_BLOCKED_IPS, _ALWAYS_BLOCKED_NETWORKS only. Applied as an independent gate in browser_navigate's pre-nav and post-redirect checks, BEFORE auto_local_this_nav gets a chance to short-circuit. Ordinary private URLs (localhost, 192.168.x, 10.x, .local, CGNAT) still route to the local sidecar as the NousResearch#16136 feature intends. Secondary fix (reporter's finding): _url_is_private() now explicitly checks 172.16.0.0/12. ipaddress.is_private only covers that range on Python ≥3.11 (bpo-40791), so on 3.10 runtimes those URLs were routed to cloud instead of the local sidecar. No security impact — just a correctness fix for the hybrid-routing feature. Closes NousResearch#16234.
1 parent 272eb13 commit 307746e

4 files changed

Lines changed: 281 additions & 1 deletion

File tree

tests/tools/test_browser_ssrf_local.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,62 @@ def test_local_allows_public_url(self, monkeypatch, _common_patches):
106106

107107
assert result["success"] is True
108108

109+
# -- Always-blocked floor: hybrid routing bypass regression (#16234) -------
110+
111+
# Hybrid-routing feature flips auto_local_this_nav=True for private URLs,
112+
# which previously short-circuited _is_safe_url() entirely. An agent
113+
# running on EC2/GCP/Azure could navigate to 169.254.169.254 via the
114+
# spawned local Chromium sidecar and read IAM credentials via
115+
# browser_snapshot. The always-blocked floor must fire regardless of
116+
# routing.
117+
IMDS_URLS = [
118+
"http://169.254.169.254/latest/meta-data/", # AWS / GCP / Azure / DO / Oracle
119+
"http://169.254.169.253/metadata/instance", # Azure IMDS wire server
120+
"http://169.254.170.2/v2/credentials", # AWS ECS task metadata
121+
"http://100.100.100.200/latest/meta-data/", # Alibaba Cloud
122+
"http://metadata.google.internal/computeMetadata/v1/", # GCP hostname
123+
]
124+
125+
@pytest.mark.parametrize("imds_url", IMDS_URLS)
126+
def test_cloud_blocks_imds_even_when_routing_to_local_sidecar(
127+
self, monkeypatch, _common_patches, imds_url
128+
):
129+
"""Hybrid routing must not let cloud metadata endpoints through."""
130+
monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False)
131+
monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False)
132+
# Simulate hybrid routing kicking in for this URL (what happens on
133+
# main pre-fix — cloud provider configured, _url_is_private → True,
134+
# so the session key routes to a local Chromium sidecar).
135+
monkeypatch.setattr(browser_tool, "_is_local_sidecar_key", lambda key: True)
136+
# _is_safe_url would catch IMDS, but pre-fix it never ran. Force
137+
# it to return True here so the test is specifically pinning the
138+
# always-blocked floor as an independent gate.
139+
monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: True)
140+
141+
result = json.loads(browser_tool.browser_navigate(imds_url))
142+
143+
assert result["success"] is False
144+
assert "cloud metadata endpoint" in result["error"]
145+
146+
def test_cloud_allows_ordinary_private_url_via_sidecar(
147+
self, monkeypatch, _common_patches
148+
):
149+
"""Hybrid routing still works for ordinary private URLs — floor
150+
must be narrow enough to not break the PR #16136 feature."""
151+
monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False)
152+
monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False)
153+
monkeypatch.setattr(browser_tool, "_is_local_sidecar_key", lambda key: True)
154+
monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: False)
155+
156+
for private in (
157+
"http://127.0.0.1:8080/dashboard",
158+
"http://192.168.1.1/admin",
159+
"http://10.0.0.5/",
160+
"http://myservice.local/",
161+
):
162+
result = json.loads(browser_tool.browser_navigate(private))
163+
assert result["success"] is True, f"Unexpected block for {private}: {result}"
164+
109165

110166
# ---------------------------------------------------------------------------
111167
# _is_local_backend() unit tests
@@ -236,6 +292,32 @@ def test_cloud_allows_redirect_to_public(self, monkeypatch, _common_patches):
236292
assert result["success"] is True
237293
assert result["url"] == final
238294

295+
# -- Always-blocked floor: redirect to IMDS via hybrid sidecar (#16234) ----
296+
297+
def test_cloud_blocks_redirect_to_imds_even_via_sidecar(
298+
self, monkeypatch, _common_patches
299+
):
300+
"""Redirect to a cloud metadata endpoint is blocked regardless of
301+
routing — even the hybrid local sidecar path can't return IMDS
302+
content to the agent."""
303+
imds_final = "http://169.254.169.254/latest/meta-data/"
304+
monkeypatch.setattr(browser_tool, "_is_local_backend", lambda: False)
305+
monkeypatch.setattr(browser_tool, "_allow_private_urls", lambda: False)
306+
monkeypatch.setattr(browser_tool, "_is_local_sidecar_key", lambda key: True)
307+
# _is_safe_url would catch it on main; force True to pin the
308+
# always-blocked floor as an independent gate.
309+
monkeypatch.setattr(browser_tool, "_is_safe_url", lambda url: True)
310+
monkeypatch.setattr(
311+
browser_tool,
312+
"_run_browser_command",
313+
lambda *a, **kw: _make_browser_result(url=imds_final),
314+
)
315+
316+
result = json.loads(browser_tool.browser_navigate(self.PUBLIC_URL))
317+
318+
assert result["success"] is False
319+
assert "cloud metadata endpoint" in result["error"]
320+
239321

240322
class TestAllowPrivateUrlsConfig:
241323
@pytest.fixture(autouse=True)

tests/tools/test_url_safety.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from tools.url_safety import (
77
is_safe_url,
8+
is_always_blocked_url,
89
_is_blocked_ip,
910
_global_allow_private_urls,
1011
_reset_allow_private_cache,
@@ -407,3 +408,69 @@ def test_empty_url_still_blocked_with_toggle(self, monkeypatch):
407408
"""Empty URLs are still blocked."""
408409
monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
409410
assert is_safe_url("") is False
411+
412+
413+
class TestIsAlwaysBlockedUrl:
414+
"""The always-blocked floor — cloud metadata only, narrower than is_safe_url."""
415+
416+
# -- The sentinel set that must always block --------------------------------
417+
418+
@pytest.mark.parametrize("url", [
419+
"http://169.254.169.254/latest/meta-data/", # AWS / GCP / Azure / DO / Oracle
420+
"http://169.254.169.253/metadata/instance", # Azure IMDS wire server
421+
"http://169.254.170.2/v2/credentials", # AWS ECS task metadata
422+
"http://100.100.100.200/latest/meta-data/", # Alibaba Cloud
423+
"http://169.254.42.1/", # Any /16 link-local
424+
])
425+
def test_literal_imds_ips_always_blocked(self, url):
426+
"""Literal IMDS IPs and the /16 link-local range always block."""
427+
assert is_always_blocked_url(url) is True
428+
429+
def test_gcp_metadata_hostname_always_blocked_even_without_dns(self):
430+
"""metadata.google.internal blocks by hostname, no DNS needed."""
431+
with patch("socket.getaddrinfo", side_effect=socket.gaierror("nope")):
432+
assert is_always_blocked_url("http://metadata.google.internal/") is True
433+
434+
def test_hostname_resolving_to_imds_always_blocked(self):
435+
"""Attacker-controlled hostname resolving to IMDS still blocks."""
436+
with patch("socket.getaddrinfo", return_value=[
437+
(2, 1, 6, "", ("169.254.169.254", 0)),
438+
]):
439+
assert is_always_blocked_url("http://attacker-controlled.example.com/") is True
440+
441+
# -- Things the floor must NOT block ----------------------------------------
442+
443+
def test_public_url_not_blocked(self):
444+
assert is_always_blocked_url("https://example.com/path") is False
445+
446+
@pytest.mark.parametrize("url", [
447+
"http://127.0.0.1:8080/",
448+
"http://192.168.1.1/",
449+
"http://10.0.0.5/",
450+
"http://172.16.0.1/",
451+
"http://100.64.0.1/", # CGNAT — blocked by is_safe_url but not by the floor
452+
])
453+
def test_ordinary_private_urls_not_in_floor(self, url):
454+
"""Floor is narrower than is_safe_url — ordinary private URLs pass."""
455+
assert is_always_blocked_url(url) is False
456+
457+
def test_dns_failure_not_in_floor(self):
458+
"""DNS failure on a non-sentinel hostname = not always-blocked.
459+
460+
Caller's ordinary fail-closed path (is_safe_url) handles that case.
461+
"""
462+
with patch("socket.getaddrinfo", side_effect=socket.gaierror("fail")):
463+
assert is_always_blocked_url("http://nonexistent.example.com/") is False
464+
465+
def test_empty_url_not_in_floor(self):
466+
"""Empty URL falls through — caller decides what to do with a malformed URL."""
467+
assert is_always_blocked_url("") is False
468+
469+
def test_malformed_url_not_in_floor(self):
470+
"""Parse errors don't claim always-blocked status."""
471+
assert is_always_blocked_url("not a url at all") is False
472+
473+
def test_floor_ignores_allow_private_urls_toggle(self, monkeypatch):
474+
"""security.allow_private_urls can NOT unblock cloud metadata."""
475+
monkeypatch.setenv("HERMES_ALLOW_PRIVATE_URLS", "true")
476+
assert is_always_blocked_url("http://169.254.169.254/") is True

tools/browser_tool.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,13 @@
7676
check_website_access = lambda url: None # noqa: E731 — fail-open if policy module unavailable
7777

7878
try:
79-
from tools.url_safety import is_safe_url as _is_safe_url
79+
from tools.url_safety import (
80+
is_safe_url as _is_safe_url,
81+
is_always_blocked_url as _is_always_blocked_url,
82+
)
8083
except Exception:
8184
_is_safe_url = lambda url: False # noqa: E731 — fail-closed: block all if safety module unavailable
85+
_is_always_blocked_url = lambda url: True # noqa: E731 — fail-closed on the floor too
8286
from tools.browser_providers.base import CloudBrowserProvider
8387
from tools.browser_providers.browserbase import BrowserbaseProvider
8488
from tools.browser_providers.browser_use import BrowserUseProvider
@@ -837,6 +841,10 @@ def _url_is_private(url: str) -> bool:
837841
ip.is_private
838842
or ip.is_loopback
839843
or ip.is_link_local
844+
# 172.16.0.0/12: only covered by ip.is_private on Python
845+
# ≥3.11 (bpo-40791). Explicit check keeps 3.10 runtimes
846+
# routing these to the local sidecar correctly.
847+
or ip in ipaddress.ip_network("172.16.0.0/12")
840848
or ip in ipaddress.ip_network("100.64.0.0/10")
841849
)
842850
except ValueError:
@@ -2081,6 +2089,18 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
20812089
nav_session_key = _navigation_session_key(effective_task_id, url)
20822090
auto_local_this_nav = _is_local_sidecar_key(nav_session_key)
20832091

2092+
# Always-blocked floor: cloud metadata / IMDS endpoints are denied
2093+
# regardless of backend, hybrid routing, or allow_private_urls.
2094+
# There's no legitimate agent use case for navigating to
2095+
# 169.254.169.254 / metadata.google.internal / ECS task metadata
2096+
# via a browser, and routing those to a local Chromium sidecar
2097+
# on an EC2/GCP/Azure host exfiltrates IAM credentials (#16234).
2098+
if not _is_local_backend() and _is_always_blocked_url(url):
2099+
return json.dumps({
2100+
"success": False,
2101+
"error": "Blocked: URL targets a cloud metadata endpoint",
2102+
})
2103+
20842104
if (
20852105
not _is_local_backend()
20862106
and not auto_local_this_nav
@@ -2143,6 +2163,21 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
21432163
# Skipped for local backends (same rationale as the pre-nav check),
21442164
# and for the hybrid local sidecar (we're already on a local browser
21452165
# hitting a private URL by design).
2166+
# Always-blocked floor (cloud metadata / IMDS) is enforced even
2167+
# when auto_local_this_nav is true — see pre-nav check for
2168+
# rationale (#16234).
2169+
if (
2170+
not _is_local_backend()
2171+
and final_url
2172+
and final_url != url
2173+
and _is_always_blocked_url(final_url)
2174+
):
2175+
_run_browser_command(nav_session_key, "open", ["about:blank"], timeout=10)
2176+
return json.dumps({
2177+
"success": False,
2178+
"error": "Blocked: redirect landed on a cloud metadata endpoint",
2179+
})
2180+
21462181
if (
21472182
not _is_local_backend()
21482183
and not auto_local_this_nav

tools/url_safety.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,102 @@ def _is_blocked_ip(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool:
147147
return False
148148

149149

150+
def is_always_blocked_url(url: str) -> bool:
151+
"""Return True when the URL targets an always-blocked endpoint.
152+
153+
This is the security floor — cloud metadata IPs / hostnames
154+
(169.254.169.254, metadata.google.internal, ECS task metadata, etc.)
155+
that have no legitimate agent use regardless of backend, routing, or
156+
the ``allow_private_urls`` toggle. Used by callers that bypass the
157+
full ``is_safe_url`` check for their own reasons (e.g. hybrid cloud
158+
browser routing to a local Chromium sidecar for private URLs) and
159+
still need to enforce the non-negotiable floor before letting the
160+
request proceed.
161+
162+
Returns True (= blocked) on:
163+
- Hostnames in ``_BLOCKED_HOSTNAMES``
164+
- IPs / networks in ``_ALWAYS_BLOCKED_IPS`` / ``_ALWAYS_BLOCKED_NETWORKS``
165+
- URLs whose hostname resolves to any of the above
166+
167+
Returns False (= not in the always-blocked floor) on:
168+
- Benign public / private / loopback URLs (whether or not they'd
169+
be blocked by the ordinary SSRF check)
170+
- DNS-resolution failures for non-sentinel hostnames (these are
171+
someone else's problem — the caller's ordinary fail-closed path
172+
will catch them if applicable)
173+
- Parse errors (caller decides fail-open vs fail-closed)
174+
175+
Intentionally narrower than ``is_safe_url``: only blocks the sentinel
176+
set, not ordinary private addresses. Callers that want the full
177+
SSRF check should still use ``is_safe_url``.
178+
"""
179+
try:
180+
parsed = urlparse(url)
181+
hostname = (parsed.hostname or "").strip().lower().rstrip(".")
182+
if not hostname:
183+
return False
184+
185+
# Blocked-hostname check fires regardless of DNS resolution
186+
if hostname in _BLOCKED_HOSTNAMES:
187+
logger.warning(
188+
"Blocked request to internal hostname (always-blocked floor): %s",
189+
hostname,
190+
)
191+
return True
192+
193+
# Literal IP → check directly against the always-blocked set
194+
try:
195+
ip = ipaddress.ip_address(hostname)
196+
except ValueError:
197+
ip = None
198+
199+
if ip is not None:
200+
if ip in _ALWAYS_BLOCKED_IPS or any(
201+
ip in net for net in _ALWAYS_BLOCKED_NETWORKS
202+
):
203+
logger.warning(
204+
"Blocked request to cloud metadata address "
205+
"(always-blocked floor): %s",
206+
hostname,
207+
)
208+
return True
209+
return False
210+
211+
# Hostname → resolve and check every answer. DNS failure is NOT
212+
# always-blocked (caller's ordinary path handles that).
213+
try:
214+
addr_info = socket.getaddrinfo(
215+
hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM
216+
)
217+
except socket.gaierror:
218+
return False
219+
220+
for _family, _, _, _, sockaddr in addr_info:
221+
ip_str = sockaddr[0]
222+
try:
223+
resolved = ipaddress.ip_address(ip_str)
224+
except ValueError:
225+
continue
226+
if resolved in _ALWAYS_BLOCKED_IPS or any(
227+
resolved in net for net in _ALWAYS_BLOCKED_NETWORKS
228+
):
229+
logger.warning(
230+
"Blocked request to cloud metadata address "
231+
"(always-blocked floor): %s -> %s",
232+
hostname,
233+
ip_str,
234+
)
235+
return True
236+
237+
return False
238+
239+
except Exception as exc:
240+
# Parse failures or unexpected errors — don't claim the URL is
241+
# always-blocked. Caller decides what to do with a malformed URL.
242+
logger.debug("is_always_blocked_url error for %s: %s", url, exc)
243+
return False
244+
245+
150246
def _allows_private_ip_resolution(hostname: str, scheme: str) -> bool:
151247
"""Return True when a trusted HTTPS hostname may bypass IP-class blocking."""
152248
return scheme == "https" and hostname in _TRUSTED_PRIVATE_IP_HOSTS

0 commit comments

Comments
 (0)