Skip to content

Commit 68d130c

Browse files
fix(sandbox): PR #12655 simplification — correctness fixes + test consolidation
Phase 1 (correctness): replace hand-rolled equality helpers with pydantic-canonicalized `_normalize_section` (via `model_dump`); future field additions to `InternetAccessConfig` / `PythonDependenciesConfig` now track automatically. `_env_vars_equal` switches from `frozenset` to `Counter` so duplicate multiplicity is preserved. Extend runtime `_resolve_user_env` to reject reserved `EnvVarLiteral.name` symmetrically with the existing `EnvVarSecretRef.secret_key` check, closing the pre-mutation-guard-row exposure D7 was meant to close. Phase 2 (test consolidation): drop three integration-layer duplicate files (`test_env_var_end_to_end.py`, `test_credential_resolution_integration.py`, `test_reserved_credential_names.py`) after folding unique scenarios into their unit-level counterparts; prune pydantic-framework-rechecking tests (`test_unknown_keys_preserved`, `test_empty_config_returns_defaults`, trivial round-trips) from `test_sandbox_config_validation.py`; drop `backend._user_env` internal-attribute assertions from `test_user_env_forwarding.py`; replace `_BACKEND_CACHE` dict-shape peeks in `test_sandbox_cache_invalidation.py` with identity-based observable checks; parametrize per-adapter shape assertions in `test_capability_advertisement.py` over `SANDBOX_ADAPTER_METADATA.keys()`; add module-level docstrings to `test_daytona_backend.py` / `test_vercel_backend.py` / `test_modal_backend.py` stating the per-adapter-files-only-for-unique-lifecycle principle; move `test_sandbox_config_validation.py` from `tests/unit/server/api/` to `tests/unit/server/sandbox/` (tests sandbox pydantic models, not API). Phase 3 (clarity): rename `_PHOENIX_SANDBOX_FALLBACK_CREDENTIAL_KEYS` → `_PHOENIX_RESERVED_CREDENTIAL_ONLY_KEYS` with an inline docstring distinguishing it from `SandboxAdapter.credential_specs` and `RESERVED_CREDENTIAL_NAMES`. Net: +599 / -718. All 429 sandbox-relevant tests pass.
1 parent e78ce9f commit 68d130c

14 files changed

Lines changed: 599 additions & 718 deletions

src/phoenix/server/sandbox/__init__.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -387,10 +387,15 @@ async def _resolve_user_env(
387387

388388
ta: TypeAdapter[EnvVarEntry] = TypeAdapter(EnvVarEntry)
389389
entries: list[EnvVarEntry] = [ta.validate_python(e) for e in raw_env_vars]
390-
# Fail-closed: reject reserved secret_key values before any DB lookup so
391-
# rows persisted before the mutation-layer guard shipped cannot be silently
392-
# resolved. This mirrors the mutation-layer check in _check_env_var_collision.
390+
# Fail-closed: reject reserved names before any DB lookup so rows persisted
391+
# before the mutation-layer guard shipped cannot be silently resolved.
392+
# Mirrors _check_env_var_collision which checks both literal name and secret_key.
393393
for entry in entries:
394+
if isinstance(entry, EnvVarLiteral) and is_reserved_credential_name(entry.name):
395+
raise MissingSecretError(
396+
f"env_var name {entry.name!r} is a reserved sandbox provider "
397+
"credential and cannot be used as a user-defined environment variable."
398+
)
394399
if isinstance(entry, EnvVarSecretRef) and is_reserved_credential_name(entry.secret_key):
395400
raise MissingSecretError(
396401
f"secret_ref.secret_key {entry.secret_key!r} is a reserved sandbox "
@@ -624,8 +629,11 @@ async def get_or_create_backend(
624629
# e2b adapter not registered) cannot narrow the reserved set.
625630
# ---------------------------------------------------------------------------
626631

627-
_PHOENIX_SANDBOX_FALLBACK_CREDENTIAL_KEYS: frozenset[str] = frozenset(
632+
_PHOENIX_RESERVED_CREDENTIAL_ONLY_KEYS: frozenset[str] = frozenset(
628633
{
634+
# Reservation-only names: NOT settable via setSandboxCredential mutation.
635+
# Contrast with SandboxAdapter.credential_specs (adapter-declared, settable
636+
# via mutation). RESERVED_CREDENTIAL_NAMES is the derived union of both.
629637
"PHOENIX_SANDBOX_TOKEN",
630638
"PHOENIX_SANDBOX_API_KEY",
631639
# Modal tokens — added here so dropping credential_specs from ModalAdapter
@@ -637,7 +645,7 @@ async def get_or_create_backend(
637645

638646

639647
def _build_reserved_credential_names() -> frozenset[str]:
640-
names: set[str] = {key.lower() for key in _PHOENIX_SANDBOX_FALLBACK_CREDENTIAL_KEYS}
648+
names: set[str] = {key.lower() for key in _PHOENIX_RESERVED_CREDENTIAL_ONLY_KEYS}
641649
for adapter in _SANDBOX_ADAPTERS.values():
642650
for spec in adapter.credential_specs:
643651
names.add(spec.key.lower())

src/phoenix/server/sandbox/types.py

Lines changed: 63 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -323,50 +323,90 @@ async def stop_session(self, session_key: str) -> None:
323323
# ---------------------------------------------------------------------------
324324

325325

326-
def _env_var_key(entry: Any) -> tuple[str, str, str, str]:
327-
"""Stable sort/hash key for a single env_var entry dict."""
326+
def _normalize_env_var(entry: Any) -> dict[str, Any]:
327+
"""Return a stable dict representation of a single env_var entry."""
328328
if isinstance(entry, dict):
329-
return (
330-
entry.get("kind", ""),
331-
entry.get("name", ""),
332-
entry.get("value", ""),
333-
entry.get("secret_key", ""),
334-
)
335-
return (
336-
getattr(entry, "kind", ""),
337-
getattr(entry, "name", ""),
338-
getattr(entry, "value", ""),
339-
getattr(entry, "secret_key", ""),
340-
)
329+
return dict(entry)
330+
# pydantic model instance — use model_dump for canonical representation
331+
if hasattr(entry, "model_dump"):
332+
result: dict[str, Any] = entry.model_dump(mode="json")
333+
return result
334+
return {
335+
"kind": getattr(entry, "kind", ""),
336+
"name": getattr(entry, "name", ""),
337+
"value": getattr(entry, "value", ""),
338+
"secret_key": getattr(entry, "secret_key", ""),
339+
}
341340

342341

343342
def _env_vars_equal(a: Any, b: Any) -> bool:
344-
"""Return True if two env_vars lists are semantically equal (order-independent)."""
343+
"""Return True if two env_vars lists are semantically equal (order-independent).
344+
345+
Uses Counter over canonical tuple representations so duplicate entries in
346+
one list are not collapsed — [X, X] != [X].
347+
"""
348+
from collections import Counter
349+
345350
if not a and not b:
346351
return True
347352
if not a or not b:
348353
return False
349-
return frozenset(_env_var_key(e) for e in a) == frozenset(_env_var_key(e) for e in b)
354+
355+
def _to_tuple(entry: Any) -> tuple[str, ...]:
356+
d = _normalize_env_var(entry)
357+
return (d.get("kind", ""), d.get("name", ""), d.get("value", ""), d.get("secret_key", ""))
358+
359+
return Counter(_to_tuple(e) for e in a) == Counter(_to_tuple(e) for e in b)
360+
361+
362+
def _normalize_section(value: Any, model_cls: Type[BaseModel]) -> dict[str, Any]:
363+
"""Normalize a config section through pydantic model_dump so comparisons track the schema."""
364+
if value is None:
365+
return {}
366+
if isinstance(value, dict):
367+
dumped: dict[str, Any] = model_cls.model_validate(value).model_dump(
368+
mode="json", exclude_defaults=False
369+
)
370+
return dumped
371+
if hasattr(value, "model_dump"):
372+
dumped = value.model_dump(mode="json", exclude_defaults=False)
373+
return dumped
374+
return {}
350375

351376

352377
def _internet_access_equal(a: Any, b: Any) -> bool:
353-
"""Return True if two internet_access values are semantically equal."""
378+
"""Return True if two internet_access values are semantically equal.
379+
380+
Canonicalizes through InternetAccessConfig.model_dump so future fields
381+
are automatically included rather than silently dropped.
382+
"""
354383
if a is None and b is None:
355384
return True
356385
if a is None or b is None:
357386
return False
358-
mode_a = a.get("mode") if isinstance(a, dict) else getattr(a, "mode", None)
359-
mode_b = b.get("mode") if isinstance(b, dict) else getattr(b, "mode", None)
360-
return mode_a == mode_b
387+
return _normalize_section(a, InternetAccessConfig) == _normalize_section(
388+
b, InternetAccessConfig
389+
)
361390

362391

363392
def _packages_equal(a: Any, b: Any) -> bool:
364-
"""Return True if two packages lists are semantically equal (set comparison)."""
393+
"""Return True if two dependencies sections are semantically equal.
394+
395+
Canonicalizes through PythonDependenciesConfig.model_dump so the lockfile
396+
field is included — set(packages) alone is insufficient. Package list order
397+
is not semantically meaningful, so packages are sorted before comparison.
398+
"""
365399
if not a and not b:
366400
return True
367401
if not a or not b:
368402
return False
369-
return set(a) == set(b)
403+
404+
def _canonical(value: Any) -> dict[str, Any]:
405+
d = _normalize_section(value, PythonDependenciesConfig)
406+
d["packages"] = sorted(d.get("packages") or [])
407+
return d
408+
409+
return _canonical(a) == _canonical(b)
370410

371411

372412
class SandboxAdapter(ABC):
@@ -580,8 +620,7 @@ def _enforce_capability_gates(
580620
packages = dependencies.get("packages") if isinstance(dependencies, dict) else None
581621
if packages:
582622
stored_deps = stored_config.get("dependencies") if stored_config else None
583-
stored_pkgs = stored_deps.get("packages") if isinstance(stored_deps, dict) else None
584-
if not _packages_equal(packages, stored_pkgs):
623+
if not _packages_equal(dependencies, stored_deps):
585624
errors.append(
586625
InitErrorDetails(
587626
type=PydanticCustomError(

tests/unit/server/api/mutations/test_sandbox_cache_invalidation.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -198,10 +198,6 @@ async def test_rotate_vercel_token_evicts_both_vercel_backends(
198198
assert py_backend is not None
199199
assert ts_backend is not None
200200

201-
# Confirm BOTH caches are populated pre-rotation.
202-
assert any(k[0] == py_adapter.key for k in _BACKEND_CACHE)
203-
assert any(k[0] == ts_adapter.key for k in _BACKEND_CACHE)
204-
205201
# Rotate via the PY backend_type only. The key-level fan-out
206202
# (invalidate_backend_cache_for_key) must evict both because
207203
# both adapters list `shared_spec_key` in credential_specs.
@@ -216,12 +212,19 @@ async def test_rotate_vercel_token_evicts_both_vercel_backends(
216212
)
217213
assert not result.errors, result.errors
218214

219-
# Both cache namespaces must be empty.
220-
assert not any(k[0] == py_adapter.key for k in _BACKEND_CACHE), (
221-
"PY cache entries survived rotation"
215+
# Both caches must have been evicted: next call returns a new instance.
216+
async with db() as session:
217+
py_backend_v2 = await get_or_create_backend(
218+
py_adapter.key, config={}, session=session, decrypt=enc.decrypt
219+
)
220+
ts_backend_v2 = await get_or_create_backend(
221+
ts_adapter.key, config={}, session=session, decrypt=enc.decrypt
222+
)
223+
assert py_backend_v2 is not py_backend, (
224+
"PY cache was not evicted: same instance returned after rotation"
222225
)
223-
assert not any(k[0] == ts_adapter.key for k in _BACKEND_CACHE), (
224-
"TS cache entries survived rotation — shared-spec fan-out failed"
226+
assert ts_backend_v2 is not ts_backend, (
227+
"TS cache was not evicted — shared-spec fan-out failed"
225228
)
226229
finally:
227230
_purge_cache_for([py_adapter.key, ts_adapter.key])

tests/unit/server/api/test_capability_advertisement.py

Lines changed: 5 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -69,21 +69,12 @@ async def test_sandbox_backends_full_ui_query_shape(
6969
assert response.data is not None
7070
backends = {b["backendType"]: b for b in response.data["sandboxBackends"]}
7171

72-
assert set(backends.keys()) == {
73-
"WASM",
74-
"E2B",
75-
"DAYTONA_PYTHON",
76-
"VERCEL_PYTHON",
77-
"VERCEL_TYPESCRIPT",
78-
"DENO",
79-
"MODAL",
80-
}
72+
assert set(backends.keys()) == set(SANDBOX_ADAPTER_METADATA.keys())
8173

8274
for bt, backend in backends.items():
8375
assert "supportsEnvVars" in backend, bt
8476
assert "internetAccess" in backend, bt
8577
assert "dependenciesLanguage" in backend, bt
86-
assert backend["internetAccess"] == "NONE", bt
8778

8879

8980
@pytest.mark.parametrize("backend_type", list(SANDBOX_ADAPTER_METADATA.keys()))
@@ -213,17 +204,8 @@ async def test_dependencies_language_only_set_for_daytona(
213204
assert response.data is not None
214205
backends = {b["backendType"]: b for b in response.data["sandboxBackends"]}
215206

216-
assert backends["DAYTONA_PYTHON"]["dependenciesLanguage"] == "PYTHON"
217-
218-
no_deps_backends = [
219-
"WASM",
220-
"E2B",
221-
"VERCEL_PYTHON",
222-
"VERCEL_TYPESCRIPT",
223-
"DENO",
224-
"MODAL",
225-
]
226-
for bt in no_deps_backends:
227-
assert backends[bt]["dependenciesLanguage"] is None, (
228-
f"{bt} unexpectedly advertises a dependenciesLanguage"
207+
for bt, meta in SANDBOX_ADAPTER_METADATA.items():
208+
expected = meta.dependencies_language
209+
assert backends[bt]["dependenciesLanguage"] == expected, (
210+
f"{bt}: expected dependenciesLanguage={expected!r}"
229211
)

0 commit comments

Comments
 (0)