langchain-ai · Mason Daugherty (mdrxy) · Dec 22, 2025 · Dec 22, 2025 · Dec 22, 2025 · Dec 22, 2025
@@ -6,7 +6,7 @@
 
 if TYPE_CHECKING:
     from langchain_core.load.dump import dumpd, dumps
-    from langchain_core.load.load import loads
+    from langchain_core.load.load import InitValidator, loads
     from langchain_core.load.serializable import Serializable
 
 # Unfortunately, we have to eagerly import load from langchain_core/load/load.py
@@ -15,11 +15,19 @@
 # the `from langchain_core.load.load import load` absolute import should also work.
 from langchain_core.load.load import load
 
-__all__ = ("Serializable", "dumpd", "dumps", "load", "loads")
+__all__ = (
+    "InitValidator",
+    "Serializable",
+    "dumpd",
+    "dumps",
+    "load",
+    "loads",
+)
 
 _dynamic_imports = {
     "dumpd": "dump",
     "dumps": "dump",
+    "InitValidator": "load",
     "loads": "load",
     "Serializable": "serializable",
 }

@@ -0,0 +1,176 @@
+"""Validation utilities for LangChain serialization.
+
+Provides escape-based protection against injection attacks in serialized objects. The
+approach uses an allowlist design: only dicts explicitly produced by
+`Serializable.to_json()` are treated as LC objects during deserialization.
+
+## How escaping works
+
+During serialization, plain dicts (user data) that contain an `'lc'` key are wrapped:
+
+```python
+{"lc": 1, ...}  # user data that looks like LC object
+# becomes:
+{"__lc_escaped__": {"lc": 1, ...}}
+```
+
+During deserialization, escaped dicts are unwrapped and returned as plain dicts,
+NOT instantiated as LC objects.
+"""
+
+from typing import Any
+
+_LC_ESCAPED_KEY = "__lc_escaped__"
+"""Sentinel key used to mark escaped user dicts during serialization.
+
+When a plain dict contains 'lc' key (which could be confused with LC objects),
+we wrap it as {"__lc_escaped__": {...original...}}.
+"""
+
+
+def _needs_escaping(obj: dict[str, Any]) -> bool:
+    """Check if a dict needs escaping to prevent confusion with LC objects.
+
+    A dict needs escaping if:
+
+    1. It has an `'lc'` key (could be confused with LC serialization format)
+    2. It has only the escape key (would be mistaken for an escaped dict)
+    """
+    return "lc" in obj or (len(obj) == 1 and _LC_ESCAPED_KEY in obj)
+
+
+def _escape_dict(obj: dict[str, Any]) -> dict[str, Any]:
+    """Wrap a dict in the escape marker.
+
+    Example:
+        ```python
+        {"key": "value"}  # becomes {"__lc_escaped__": {"key": "value"}}
+        ```
+    """
+    return {_LC_ESCAPED_KEY: obj}
+
+
+def _is_escaped_dict(obj: dict[str, Any]) -> bool:
+    """Check if a dict is an escaped user dict.
+
+    Example:
+        ```python
+        {"__lc_escaped__": {...}}  # is an escaped dict
+        ```
+    """
+    return len(obj) == 1 and _LC_ESCAPED_KEY in obj
+
+
+def _serialize_value(obj: Any) -> Any:
+    """Serialize a value with escaping of user dicts.
+
+    Called recursively on kwarg values to escape any plain dicts that could be confused
+    with LC objects.
+
+    Args:
+        obj: The value to serialize.
+
+    Returns:
+        The serialized value with user dicts escaped as needed.
+    """
+    from langchain_core.load.serializable import (  # noqa: PLC0415
+        Serializable,
+        to_json_not_implemented,
+    )
+
+    if isinstance(obj, Serializable):
+        # This is an LC object - serialize it properly (not escaped)
+        return _serialize_lc_object(obj)
+    if isinstance(obj, dict):
+        if not all(isinstance(k, (str, int, float, bool, type(None))) for k in obj):
+            # if keys are not json serializable
+            return to_json_not_implemented(obj)
+        # Check if dict needs escaping BEFORE recursing into values.
+        # If it needs escaping, wrap it as-is - the contents are user data that
+        # will be returned as-is during deserialization (no instantiation).
+        # This prevents re-escaping of already-escaped nested content.
+        if _needs_escaping(obj):
+            return _escape_dict(obj)
+        # Safe dict (no 'lc' key) - recurse into values
+        return {k: _serialize_value(v) for k, v in obj.items()}
+    if isinstance(obj, (list, tuple)):
+        return [_serialize_value(item) for item in obj]
+    if isinstance(obj, (str, int, float, bool, type(None))):
+        return obj
+
+    # Non-JSON-serializable object (datetime, custom objects, etc.)
+    return to_json_not_implemented(obj)
+
+
+def _is_lc_secret(obj: Any) -> bool:
+    """Check if an object is a LangChain secret marker."""
+    expected_num_keys = 3
+    return (
+        isinstance(obj, dict)
+        and obj.get("lc") == 1
+        and obj.get("type") == "secret"
+        and "id" in obj
+        and len(obj) == expected_num_keys
+    )
+
+
+def _serialize_lc_object(obj: Any) -> dict[str, Any]:
+    """Serialize a `Serializable` object with escaping of user data in kwargs.
+
+    Args:
+        obj: The `Serializable` object to serialize.
+
+    Returns:
+        The serialized dict with user data in kwargs escaped as needed.
+
+    Note:
+        Kwargs values are processed with `_serialize_value` to escape user data (like
+        metadata) that contains `'lc'` keys. Secret fields (from `lc_secrets`) are
+        skipped because `to_json()` replaces their values with secret markers.
+    """
+    from langchain_core.load.serializable import Serializable  # noqa: PLC0415
+
+    if not isinstance(obj, Serializable):
+        msg = f"Expected Serializable, got {type(obj)}"
+        raise TypeError(msg)
+
+    serialized: dict[str, Any] = dict(obj.to_json())
+
+    # Process kwargs to escape user data that could be confused with LC objects
+    # Skip secret fields - to_json() already converted them to secret markers
+    if serialized.get("type") == "constructor" and "kwargs" in serialized:
+        serialized["kwargs"] = {
+            k: v if _is_lc_secret(v) else _serialize_value(v)
+            for k, v in serialized["kwargs"].items()
+        }
+
+    return serialized
+
+
+def _unescape_value(obj: Any) -> Any:
+    """Unescape a value, processing escape markers in dict values and lists.
+
+    When an escaped dict is encountered (`{"__lc_escaped__": ...}`), it's
+    unwrapped and the contents are returned AS-IS (no further processing).
+    The contents represent user data that should not be modified.
+
+    For regular dicts and lists, we recurse to find any nested escape markers.
+
+    Args:
+        obj: The value to unescape.
+
+    Returns:
+        The unescaped value.
+    """
+    if isinstance(obj, dict):
+        if _is_escaped_dict(obj):
+            # Unwrap and return the user data as-is (no further unescaping).
+            # The contents are user data that may contain more escape keys,
+            # but those are part of the user's actual data.
+            return obj[_LC_ESCAPED_KEY]
+
+        # Regular dict - recurse into values to find nested escape markers
+        return {k: _unescape_value(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [_unescape_value(item) for item in obj]
+    return obj
@@ -1,10 +1,26 @@
-"""Dump objects to json."""
+"""Serialize LangChain objects to JSON.
+
+Provides `dumps` (to JSON string) and `dumpd` (to dict) for serializing
+`Serializable` objects.
+
+## Escaping
+
+During serialization, plain dicts (user data) that contain an `'lc'` key are escaped
+by wrapping them: `{"__lc_escaped__": {...original...}}`. This prevents injection
+attacks where malicious data could trick the deserializer into instantiating
+arbitrary classes. The escape marker is removed during deserialization.
+
+This is an allowlist approach: only dicts explicitly produced by
+`Serializable.to_json()` are treated as LC objects; everything else is escaped if it
+could be confused with the LC format.
+"""
 
 import json
 from typing import Any
 
 from pydantic import BaseModel
 
+from langchain_core.load._validation import _serialize_value
 from langchain_core.load.serializable import Serializable, to_json_not_implemented
 from langchain_core.messages import AIMessage
 from langchain_core.outputs import ChatGeneration
@@ -25,6 +41,20 @@ def default(obj: Any) -> Any:
 
 
 def _dump_pydantic_models(obj: Any) -> Any:
+    """Convert nested Pydantic models to dicts for JSON serialization.
+
+    Handles the special case where a `ChatGeneration` contains an `AIMessage`
+    with a parsed Pydantic model in `additional_kwargs["parsed"]`. Since
+    Pydantic models aren't directly JSON serializable, this converts them to
+    dicts.
+
+    Args:
+        obj: The object to process.
+
+    Returns:
+        A copy of the object with nested Pydantic models converted to dicts, or
+            the original object unchanged if no conversion was needed.
+    """
     if (
         isinstance(obj, ChatGeneration)
         and isinstance(obj.message, AIMessage)
@@ -40,10 +70,17 @@ def _dump_pydantic_models(obj: Any) -> Any:
 def dumps(obj: Any, *, pretty: bool = False, **kwargs: Any) -> str:
     """Return a JSON string representation of an object.
 
+    Note:
+        Plain dicts containing an `'lc'` key are automatically escaped to prevent
+        confusion with LC serialization format. The escape marker is removed during
+        deserialization.
+
     Args:
         obj: The object to dump.
-        pretty: Whether to pretty print the json. If `True`, the json will be
-            indented with 2 spaces (if no indent is provided as part of `kwargs`).
+        pretty: Whether to pretty print the json.
+
+            If `True`, the json will be indented by either 2 spaces or the amount
+            provided in the `indent` kwarg.
         **kwargs: Additional arguments to pass to `json.dumps`
 
     Returns:
@@ -55,28 +92,29 @@ def dumps(obj: Any, *, pretty: bool = False, **kwargs: Any) -> str:
     if "default" in kwargs:
         msg = "`default` should not be passed to dumps"
         raise ValueError(msg)
-    try:
-        obj = _dump_pydantic_models(obj)
-        if pretty:
-            indent = kwargs.pop("indent", 2)
-            return json.dumps(obj, default=default, indent=indent, **kwargs)
-        return json.dumps(obj, default=default, **kwargs)
-    except TypeError:
-        if pretty:
-            indent = kwargs.pop("indent", 2)
-            return json.dumps(to_json_not_implemented(obj), indent=indent, **kwargs)
-        return json.dumps(to_json_not_implemented(obj), **kwargs)
+
+    obj = _dump_pydantic_models(obj)
+    serialized = _serialize_value(obj)
+
+    if pretty:
+        indent = kwargs.pop("indent", 2)
+        return json.dumps(serialized, indent=indent, **kwargs)
+    return json.dumps(serialized, **kwargs)
 
 
 def dumpd(obj: Any) -> Any:
     """Return a dict representation of an object.
 
+    Note:
+        Plain dicts containing an `'lc'` key are automatically escaped to prevent
+        confusion with LC serialization format. The escape marker is removed during
+        deserialization.
+
     Args:
         obj: The object to dump.
 
     Returns:
         Dictionary that can be serialized to json using `json.dumps`.
     """
-    # Unfortunately this function is not as efficient as it could be because it first
-    # dumps the object to a json string and then loads it back into a dictionary.
-    return json.loads(dumps(obj))
+    obj = _dump_pydantic_models(obj)
+    return _serialize_value(obj)