quantumlib · maffoo · Feb 20, 2024 · Feb 17, 2024 · Feb 19, 2024 · 95-martin-orion
@@ -28,7 +28,6 @@
     Optional,
     overload,
     Sequence,
-    Set,
     Tuple,
     Type,
     Union,
@@ -221,10 +220,22 @@ class CirqEncoder(json.JSONEncoder):
        See https://github.com/quantumlib/Cirq/issues/2014
     """
 
+    def __init__(self, *args, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        self._memo: dict[Any, dict] = {}
+
     def default(self, o):
         # Object with custom method?
         if hasattr(o, '_json_dict_'):
-            return _json_dict_with_cirq_type(o)
+            json_dict = _json_dict_with_cirq_type(o)
+            if isinstance(o, SerializableByKey):
+                if ref := self._memo.get(o):
+                    return ref
+                key = len(self._memo)
+                ref = {"cirq_type": "REF", "key": key}
+                self._memo[o] = ref
+                return {"cirq_type": "VAL", "key": key, "val": json_dict}
+            return json_dict
 
         # Sympy object? (Must come before general number checks.)
         # TODO: More support for sympy
@@ -306,27 +317,46 @@ def default(self, o):
         return super().default(o)  # pragma: no cover
 
 
-def _cirq_object_hook(d, resolvers: Sequence[JsonResolver], context_map: Dict[str, Any]):
-    if 'cirq_type' not in d:
-        return d
+class ObjectHook:
+    """Callable to be used as object_hook during deserialization."""
+
+    LEGACY_CONTEXT_TYPES = {'_ContextualSerialization', '_SerializedKey', '_SerializedContext'}
+
+    def __init__(self, resolvers: Sequence[JsonResolver]) -> None:
+        self.resolvers = resolvers
+        self.memo: Dict[int, SerializableByKey] = {}
+        self.context_map: Dict[int, SerializableByKey] = {}
 
-    if d['cirq_type'] == '_SerializedKey':
-        return _SerializedKey.read_from_context(context_map, **d)
+    def __call__(self, d):
+        cirq_type = d.get('cirq_type')
+        if cirq_type is None:
+            return d
 
-    if d['cirq_type'] == '_SerializedContext':
-        _SerializedContext.update_context(context_map, **d)
-        return None
+        if cirq_type == 'VAL':
+            obj = d['val']
+            self.memo[d['key']] = obj
+            return obj
 
-    if d['cirq_type'] == '_ContextualSerialization':
-        return _ContextualSerialization.deserialize_with_context(**d)
+        if cirq_type == 'REF':
+            return self.memo[d['key']]
 
-    cls = factory_from_json(d['cirq_type'], resolvers=resolvers)
-    from_json_dict = getattr(cls, '_from_json_dict_', None)
-    if from_json_dict is not None:
-        return from_json_dict(**d)
+        # Deserialize from legacy "contextual serialization" format
+        if cirq_type in self.LEGACY_CONTEXT_TYPES:
+            if cirq_type == '_SerializedKey':
+                return self.context_map[d['key']]
+            if cirq_type == '_SerializedContext':
+                self.context_map[d['key']] = d['obj']
+                return None
+            if cirq_type == '_ContextualSerialization':
+                return d['object_dag'][-1]
 
-    del d['cirq_type']
-    return cls(**d)
+        cls = factory_from_json(cirq_type, resolvers=self.resolvers)
+        from_json_dict = getattr(cls, '_from_json_dict_', None)
+        if from_json_dict is not None:
+            return from_json_dict(**d)
+
+        del d['cirq_type']
+        return cls(**d)
 
 
 class SerializableByKey(SupportsJSON):
@@ -338,137 +368,6 @@ class SerializableByKey(SupportsJSON):
     """
 
 
-class _SerializedKey(SupportsJSON):
-    """Internal object for holding a SerializableByKey key.
-
-    This is a private type used in contextual serialization. Its deserialization
-    is context-dependent, and is not expected to match the original; in other
-    words, `cls._from_json_dict_(obj._json_dict_())` does not return
-    the original `obj` for this type.
-    """
-
-    def __init__(self, key: str):
-        self.key = key
-
-    def _json_dict_(self):
-        return obj_to_dict_helper(self, ['key'])
-
-    @classmethod
-    def _from_json_dict_(cls, **kwargs):
-        raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.')
-
-    @classmethod
-    def read_from_context(cls, context_map, key, **kwargs):
-        return context_map[key]
-
-
-class _SerializedContext(SupportsJSON):
-    """Internal object for a single SerializableByKey key-to-object mapping.
-
-    This is a private type used in contextual serialization. Its deserialization
-    is context-dependent, and is not expected to match the original; in other
-    words, `cls._from_json_dict_(obj._json_dict_())` does not return
-    the original `obj` for this type.
-    """
-
-    def __init__(self, obj: SerializableByKey, uid: int):
-        self.key = uid
-        self.obj = obj
-
-    def _json_dict_(self):
-        return obj_to_dict_helper(self, ['key', 'obj'])
-
-    @classmethod
-    def _from_json_dict_(cls, **kwargs):
-        raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.')
-
-    @classmethod
-    def update_context(cls, context_map, key, obj, **kwargs):
-        context_map.update({key: obj})
-
-
-class _ContextualSerialization(SupportsJSON):
-    """Internal object for serializing an object with its context.
-
-    This is a private type used in contextual serialization. Its deserialization
-    is context-dependent, and is not expected to match the original; in other
-    words, `cls._from_json_dict_(obj._json_dict_())` does not return
-    the original `obj` for this type.
-    """
-
-    def __init__(self, obj: Any):
-        # Context information and the wrapped object are stored together in
-        # `object_dag` to ensure consistent serialization ordering.
-        self.object_dag = []
-        context = []
-        for sbk in get_serializable_by_keys(obj):
-            if sbk not in context:
-                context.append(sbk)
-                new_sc = _SerializedContext(sbk, len(context))
-                self.object_dag.append(new_sc)
-        self.object_dag += [obj]
-
-    def _json_dict_(self):
-        return obj_to_dict_helper(self, ['object_dag'])
-
-    @classmethod
-    def _from_json_dict_(cls, **kwargs):
-        raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.')
-
-    @classmethod
-    def deserialize_with_context(cls, object_dag, **kwargs):
-        # The last element of object_dag is the object to be deserialized.
-        return object_dag[-1]
-
-
-def has_serializable_by_keys(obj: Any) -> bool:
-    """Returns true if obj contains one or more SerializableByKey objects."""
-    if isinstance(obj, SerializableByKey):
-        return True
-    json_dict = getattr(obj, '_json_dict_', lambda: None)()
-    if isinstance(json_dict, Dict):
-        return any(has_serializable_by_keys(v) for v in json_dict.values())
-
-    # Handle primitive container types.
-    if isinstance(obj, Dict):
-        return any(has_serializable_by_keys(elem) for pair in obj.items() for elem in pair)
-
-    if hasattr(obj, '__iter__') and not isinstance(obj, str):
-        # Return False on TypeError because some numpy values
-        # (like np.array(1)) have iterable methods
-        # yet return a TypeError when there is an attempt to iterate over them
-        try:
-            return any(has_serializable_by_keys(elem) for elem in obj)
-        except TypeError:
-            return False
-    return False
-
-
-def get_serializable_by_keys(obj: Any) -> List[SerializableByKey]:
-    """Returns all SerializableByKeys contained by obj.
-
-    Objects are ordered such that nested objects appear before the object they
-    are nested inside. This is required to ensure SerializableByKeys are only
-    fully defined once in serialization.
-    """
-    result = []
-    if isinstance(obj, SerializableByKey):
-        result.append(obj)
-    json_dict = getattr(obj, '_json_dict_', lambda: None)()
-    if isinstance(json_dict, Dict):
-        for v in json_dict.values():
-            result = get_serializable_by_keys(v) + result
-    if result:
-        return result
-
-    # Handle primitive container types.
-    if isinstance(obj, Dict):
-        return [sbk for pair in obj.items() for sbk in get_serializable_by_keys(pair)]
-    if hasattr(obj, '__iter__') and not isinstance(obj, str):
-        return [sbk for v in obj for sbk in get_serializable_by_keys(v)]
-    return []
-
-
 def json_namespace(type_obj: Type) -> str:
     """Returns a namespace for JSON serialization of `type_obj`.
 
@@ -610,37 +509,12 @@ def to_json(
             party classes, prefer adding the `_json_dict_` magic method
             to your classes rather than overriding this default.
     """
-    if has_serializable_by_keys(obj):
-        obj = _ContextualSerialization(obj)
-
-        class ContextualEncoder(cls):  # type: ignore
-            """An encoder with a context map for concise serialization."""
-
-            # These lists populate gradually during serialization. An object
-            # with components defined in 'context' will represent those
-            # components using their keys instead of inline definition.
-            seen: Set[str] = set()
-
-            def default(self, o):
-                if not isinstance(o, SerializableByKey):
-                    return super().default(o)
-                for candidate in obj.object_dag[:-1]:
-                    if candidate.obj == o:
-                        if not candidate.key in ContextualEncoder.seen:
-                            ContextualEncoder.seen.add(candidate.key)
-                            return _json_dict_with_cirq_type(candidate.obj)
-                        else:
-                            return _json_dict_with_cirq_type(_SerializedKey(candidate.key))
-                raise ValueError("Object mutated during serialization.")  # pragma: no cover
-
-        cls = ContextualEncoder
-
     if file_or_fn is None:
         return json.dumps(obj, indent=indent, separators=separators, cls=cls)
 
     if isinstance(file_or_fn, (str, pathlib.Path)):
         with open(file_or_fn, 'w') as actually_a_file:
-            json.dump(obj, actually_a_file, indent=indent, cls=cls)
+            json.dump(obj, actually_a_file, indent=indent, separators=separators, cls=cls)
             return None
 
     json.dump(obj, file_or_fn, indent=indent, separators=separators, cls=cls)
@@ -682,10 +556,7 @@ def read_json(
     if resolvers is None:
         resolvers = DEFAULT_RESOLVERS
 
-    context_map: Dict[str, 'SerializableByKey'] = {}
-
-    def obj_hook(x):
-        return _cirq_object_hook(x, resolvers, context_map)
+    obj_hook = ObjectHook(resolvers)
 
     if json_text is not None:
         return json.loads(json_text, object_hook=obj_hook)

@@ -373,6 +373,11 @@ def __eq__(self, other):
             and self.data_dict == other.data_dict
         )
 
+    def __hash__(self):
+        return hash(
+            (self.name, tuple(self.data_list), self.data_tuple, frozenset(self.data_dict.items()))
+        )
+
     def _json_dict_(self):
         return {
             "name": self.name,
@@ -386,12 +391,12 @@ def _from_json_dict_(cls, name, data_list, data_tuple, data_dict, **kwargs):
         return cls(name, data_list, tuple(data_tuple), data_dict)
 
 
-def test_context_serialization():
+def test_serializable_by_key():
     def custom_resolver(name):
         if name == 'SBKImpl':
             return SBKImpl
 
-    test_resolvers = [custom_resolver] + cirq.DEFAULT_RESOLVERS
+    test_resolvers = [custom_resolver, *cirq.DEFAULT_RESOLVERS]
 
     sbki_empty = SBKImpl('sbki_empty')
     assert_json_roundtrip_works(sbki_empty, resolvers=test_resolvers)
@@ -406,55 +411,22 @@ def custom_resolver(name):
     assert_json_roundtrip_works(sbki_dict, resolvers=test_resolvers)
 
     sbki_json = str(cirq.to_json(sbki_dict))
-    # There should be exactly one context item for each previous SBKImpl.
-    assert sbki_json.count('"cirq_type": "_SerializedContext"') == 4
-    # There should be exactly two key items for each of sbki_(empty|list|tuple),
-    # plus one for the top-level sbki_dict.
-    assert sbki_json.count('"cirq_type": "_SerializedKey"') == 7
-    # The final object should be a _SerializedKey for sbki_dict.
-    final_obj_idx = sbki_json.rfind('{')
-    final_obj = sbki_json[final_obj_idx : sbki_json.find('}', final_obj_idx) + 1]
-    assert (
-        final_obj
-        == """{
-      "cirq_type": "_SerializedKey",
-      "key": 4
-    }"""
-    )
+    # There are 4 SBKImpl instances, one each for empty, list, tuple, dict.
+    assert sbki_json.count('"cirq_type": "VAL"') == 4
+    # There are 3 SBKImpl refs, one each for empty, list, and tuple.
+    assert sbki_json.count('"cirq_type": "REF"') == 3
 
     list_sbki = [sbki_dict]
     assert_json_roundtrip_works(list_sbki, resolvers=test_resolvers)
 
     dict_sbki = {'a': sbki_dict}
     assert_json_roundtrip_works(dict_sbki, resolvers=test_resolvers)
 
-    assert sbki_list != json_serialization._SerializedKey(sbki_list)
-
     # Serialization keys have unique suffixes.
     sbki_other_list = SBKImpl('sbki_list', data_list=[sbki_list])
     assert_json_roundtrip_works(sbki_other_list, resolvers=test_resolvers)
 
 
-def test_internal_serializer_types():
-    sbki = SBKImpl('test_key')
-    key = 1
-    test_key = json_serialization._SerializedKey(key)
-    test_context = json_serialization._SerializedContext(sbki, 1)
-    test_serialization = json_serialization._ContextualSerialization(sbki)
-
-    key_json = test_key._json_dict_()
-    with pytest.raises(TypeError, match='_from_json_dict_'):
-        _ = json_serialization._SerializedKey._from_json_dict_(**key_json)
-
-    context_json = test_context._json_dict_()
-    with pytest.raises(TypeError, match='_from_json_dict_'):
-        _ = json_serialization._SerializedContext._from_json_dict_(**context_json)
-
-    serialization_json = test_serialization._json_dict_()
-    with pytest.raises(TypeError, match='_from_json_dict_'):
-        _ = json_serialization._ContextualSerialization._from_json_dict_(**serialization_json)
-
-
 # during test setup deprecated submodules are inspected and trigger the
 # deprecation error in testing. It is cleaner to just turn it off than to assert
 # deprecation for each submodule.