Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 50 additions & 179 deletions cirq-core/cirq/protocols/json_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
Optional,
overload,
Sequence,
Set,
Tuple,
Type,
Union,
Expand Down Expand Up @@ -221,10 +220,22 @@ class CirqEncoder(json.JSONEncoder):
See https://github.com/quantumlib/Cirq/issues/2014
"""

def __init__(self, *args, **kwargs) -> None:
super().__init__(*args, **kwargs)
self._memo: dict[Any, dict] = {}

def default(self, o):
# Object with custom method?
if hasattr(o, '_json_dict_'):
return _json_dict_with_cirq_type(o)
json_dict = _json_dict_with_cirq_type(o)
if isinstance(o, SerializableByKey):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't be necessary - SerializableByKey wraps SupportsJSON without modification, so the hasattr check above already confirms that o is SerializableByKey.

This also suggests that SerializableByKey can be entirely replaced by SupportsJSON, although that may be more effort than it's worth. I suspect that in early iterations SerializableByKey had some functionality, but I removed it before merging without removing the class itself.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's true that SerializableByKey doesn't add any methods, but it still serves as a marker for types that want to opt in to this "deduplication" during serialization. One this that is being added here is that we now require that SerializableByKey classes must be hashable, which is not necessarily the case with all classes that implement SupportsJSON. If we want to change the opt-in mechanism that would be fine with me; I do think the name SerializableByKey is a bit misleading (and has been since #3673 when the _serialization_key_ method was removed) so at very least we might consider changing the name.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good call. Renaming feels like it would be a headache to me, but given that this changes the serialization format anyways I guess now would be the time for it.

Happy to review any changes from this here or as a separate PR.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the SerializableByKey name itself no longer appears in the serialized format, renaming will be a source-only change and would not affect compatibility of deserializing stored data, so that should be low risk. Nevertheless, since naming always invites bikeshedding, I'm inclined to defer that to a future PR, so I'll leave this as-is for now :-)

if ref := self._memo.get(o):
return ref
key = len(self._memo)
ref = {"cirq_type": "REF", "key": key}
self._memo[o] = ref
return {"cirq_type": "VAL", "key": key, "val": json_dict}
return json_dict

# Sympy object? (Must come before general number checks.)
# TODO: More support for sympy
Expand Down Expand Up @@ -306,27 +317,46 @@ def default(self, o):
return super().default(o) # pragma: no cover


def _cirq_object_hook(d, resolvers: Sequence[JsonResolver], context_map: Dict[str, Any]):
if 'cirq_type' not in d:
return d
class ObjectHook:
"""Callable to be used as object_hook during deserialization."""

LEGACY_CONTEXT_TYPES = {'_ContextualSerialization', '_SerializedKey', '_SerializedContext'}

def __init__(self, resolvers: Sequence[JsonResolver]) -> None:
self.resolvers = resolvers
self.memo: Dict[int, SerializableByKey] = {}
self.context_map: Dict[int, SerializableByKey] = {}

if d['cirq_type'] == '_SerializedKey':
return _SerializedKey.read_from_context(context_map, **d)
def __call__(self, d):
cirq_type = d.get('cirq_type')
if cirq_type is None:
return d

if d['cirq_type'] == '_SerializedContext':
_SerializedContext.update_context(context_map, **d)
return None
if cirq_type == 'VAL':
obj = d['val']
self.memo[d['key']] = obj
return obj

if d['cirq_type'] == '_ContextualSerialization':
return _ContextualSerialization.deserialize_with_context(**d)
if cirq_type == 'REF':
return self.memo[d['key']]

cls = factory_from_json(d['cirq_type'], resolvers=resolvers)
from_json_dict = getattr(cls, '_from_json_dict_', None)
if from_json_dict is not None:
return from_json_dict(**d)
# Deserialize from legacy "contextual serialization" format
if cirq_type in self.LEGACY_CONTEXT_TYPES:
if cirq_type == '_SerializedKey':
return self.context_map[d['key']]
if cirq_type == '_SerializedContext':
self.context_map[d['key']] = d['obj']
return None
if cirq_type == '_ContextualSerialization':
return d['object_dag'][-1]

del d['cirq_type']
return cls(**d)
cls = factory_from_json(cirq_type, resolvers=self.resolvers)
from_json_dict = getattr(cls, '_from_json_dict_', None)
if from_json_dict is not None:
return from_json_dict(**d)

del d['cirq_type']
return cls(**d)


class SerializableByKey(SupportsJSON):
Expand All @@ -338,137 +368,6 @@ class SerializableByKey(SupportsJSON):
"""


class _SerializedKey(SupportsJSON):
"""Internal object for holding a SerializableByKey key.

This is a private type used in contextual serialization. Its deserialization
is context-dependent, and is not expected to match the original; in other
words, `cls._from_json_dict_(obj._json_dict_())` does not return
the original `obj` for this type.
"""

def __init__(self, key: str):
self.key = key

def _json_dict_(self):
return obj_to_dict_helper(self, ['key'])

@classmethod
def _from_json_dict_(cls, **kwargs):
raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.')

@classmethod
def read_from_context(cls, context_map, key, **kwargs):
return context_map[key]


class _SerializedContext(SupportsJSON):
"""Internal object for a single SerializableByKey key-to-object mapping.

This is a private type used in contextual serialization. Its deserialization
is context-dependent, and is not expected to match the original; in other
words, `cls._from_json_dict_(obj._json_dict_())` does not return
the original `obj` for this type.
"""

def __init__(self, obj: SerializableByKey, uid: int):
self.key = uid
self.obj = obj

def _json_dict_(self):
return obj_to_dict_helper(self, ['key', 'obj'])

@classmethod
def _from_json_dict_(cls, **kwargs):
raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.')

@classmethod
def update_context(cls, context_map, key, obj, **kwargs):
context_map.update({key: obj})


class _ContextualSerialization(SupportsJSON):
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[No change needed] From our discussion offline: I think the issue with nested objects I was thinking of was linked to this "context table" implementation I used, which did not allow nested definitions. The new design, which allows a VAL inside of a VAL, should bypass the issue.

"""Internal object for serializing an object with its context.

This is a private type used in contextual serialization. Its deserialization
is context-dependent, and is not expected to match the original; in other
words, `cls._from_json_dict_(obj._json_dict_())` does not return
the original `obj` for this type.
"""

def __init__(self, obj: Any):
# Context information and the wrapped object are stored together in
# `object_dag` to ensure consistent serialization ordering.
self.object_dag = []
context = []
for sbk in get_serializable_by_keys(obj):
if sbk not in context:
context.append(sbk)
new_sc = _SerializedContext(sbk, len(context))
self.object_dag.append(new_sc)
self.object_dag += [obj]

def _json_dict_(self):
return obj_to_dict_helper(self, ['object_dag'])

@classmethod
def _from_json_dict_(cls, **kwargs):
raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.')

@classmethod
def deserialize_with_context(cls, object_dag, **kwargs):
# The last element of object_dag is the object to be deserialized.
return object_dag[-1]


def has_serializable_by_keys(obj: Any) -> bool:
"""Returns true if obj contains one or more SerializableByKey objects."""
if isinstance(obj, SerializableByKey):
return True
json_dict = getattr(obj, '_json_dict_', lambda: None)()
if isinstance(json_dict, Dict):
return any(has_serializable_by_keys(v) for v in json_dict.values())

# Handle primitive container types.
if isinstance(obj, Dict):
return any(has_serializable_by_keys(elem) for pair in obj.items() for elem in pair)

if hasattr(obj, '__iter__') and not isinstance(obj, str):
# Return False on TypeError because some numpy values
# (like np.array(1)) have iterable methods
# yet return a TypeError when there is an attempt to iterate over them
try:
return any(has_serializable_by_keys(elem) for elem in obj)
except TypeError:
return False
return False


def get_serializable_by_keys(obj: Any) -> List[SerializableByKey]:
"""Returns all SerializableByKeys contained by obj.

Objects are ordered such that nested objects appear before the object they
are nested inside. This is required to ensure SerializableByKeys are only
fully defined once in serialization.
"""
result = []
if isinstance(obj, SerializableByKey):
result.append(obj)
json_dict = getattr(obj, '_json_dict_', lambda: None)()
if isinstance(json_dict, Dict):
for v in json_dict.values():
result = get_serializable_by_keys(v) + result
if result:
return result

# Handle primitive container types.
if isinstance(obj, Dict):
return [sbk for pair in obj.items() for sbk in get_serializable_by_keys(pair)]
if hasattr(obj, '__iter__') and not isinstance(obj, str):
return [sbk for v in obj for sbk in get_serializable_by_keys(v)]
return []


def json_namespace(type_obj: Type) -> str:
"""Returns a namespace for JSON serialization of `type_obj`.

Expand Down Expand Up @@ -610,37 +509,12 @@ def to_json(
party classes, prefer adding the `_json_dict_` magic method
to your classes rather than overriding this default.
"""
if has_serializable_by_keys(obj):
obj = _ContextualSerialization(obj)

class ContextualEncoder(cls): # type: ignore
"""An encoder with a context map for concise serialization."""

# These lists populate gradually during serialization. An object
# with components defined in 'context' will represent those
# components using their keys instead of inline definition.
seen: Set[str] = set()

def default(self, o):
if not isinstance(o, SerializableByKey):
return super().default(o)
for candidate in obj.object_dag[:-1]:
if candidate.obj == o:
if not candidate.key in ContextualEncoder.seen:
ContextualEncoder.seen.add(candidate.key)
return _json_dict_with_cirq_type(candidate.obj)
else:
return _json_dict_with_cirq_type(_SerializedKey(candidate.key))
raise ValueError("Object mutated during serialization.") # pragma: no cover

cls = ContextualEncoder

if file_or_fn is None:
return json.dumps(obj, indent=indent, separators=separators, cls=cls)

if isinstance(file_or_fn, (str, pathlib.Path)):
with open(file_or_fn, 'w') as actually_a_file:
json.dump(obj, actually_a_file, indent=indent, cls=cls)
json.dump(obj, actually_a_file, indent=indent, separators=separators, cls=cls)
return None

json.dump(obj, file_or_fn, indent=indent, separators=separators, cls=cls)
Expand Down Expand Up @@ -682,10 +556,7 @@ def read_json(
if resolvers is None:
resolvers = DEFAULT_RESOLVERS

context_map: Dict[str, 'SerializableByKey'] = {}

def obj_hook(x):
return _cirq_object_hook(x, resolvers, context_map)
obj_hook = ObjectHook(resolvers)

if json_text is not None:
return json.loads(json_text, object_hook=obj_hook)
Expand Down
50 changes: 11 additions & 39 deletions cirq-core/cirq/protocols/json_serialization_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,11 @@ def __eq__(self, other):
and self.data_dict == other.data_dict
)

def __hash__(self):
return hash(
(self.name, tuple(self.data_list), self.data_tuple, frozenset(self.data_dict.items()))
)

def _json_dict_(self):
return {
"name": self.name,
Expand All @@ -386,12 +391,12 @@ def _from_json_dict_(cls, name, data_list, data_tuple, data_dict, **kwargs):
return cls(name, data_list, tuple(data_tuple), data_dict)


def test_context_serialization():
def test_serializable_by_key():
def custom_resolver(name):
if name == 'SBKImpl':
return SBKImpl

test_resolvers = [custom_resolver] + cirq.DEFAULT_RESOLVERS
test_resolvers = [custom_resolver, *cirq.DEFAULT_RESOLVERS]

sbki_empty = SBKImpl('sbki_empty')
assert_json_roundtrip_works(sbki_empty, resolvers=test_resolvers)
Expand All @@ -406,55 +411,22 @@ def custom_resolver(name):
assert_json_roundtrip_works(sbki_dict, resolvers=test_resolvers)

sbki_json = str(cirq.to_json(sbki_dict))
# There should be exactly one context item for each previous SBKImpl.
assert sbki_json.count('"cirq_type": "_SerializedContext"') == 4
# There should be exactly two key items for each of sbki_(empty|list|tuple),
# plus one for the top-level sbki_dict.
assert sbki_json.count('"cirq_type": "_SerializedKey"') == 7
# The final object should be a _SerializedKey for sbki_dict.
final_obj_idx = sbki_json.rfind('{')
final_obj = sbki_json[final_obj_idx : sbki_json.find('}', final_obj_idx) + 1]
assert (
final_obj
== """{
"cirq_type": "_SerializedKey",
"key": 4
}"""
)
# There are 4 SBKImpl instances, one each for empty, list, tuple, dict.
assert sbki_json.count('"cirq_type": "VAL"') == 4
# There are 3 SBKImpl refs, one each for empty, list, and tuple.
assert sbki_json.count('"cirq_type": "REF"') == 3

list_sbki = [sbki_dict]
assert_json_roundtrip_works(list_sbki, resolvers=test_resolvers)

dict_sbki = {'a': sbki_dict}
assert_json_roundtrip_works(dict_sbki, resolvers=test_resolvers)

assert sbki_list != json_serialization._SerializedKey(sbki_list)

# Serialization keys have unique suffixes.
sbki_other_list = SBKImpl('sbki_list', data_list=[sbki_list])
assert_json_roundtrip_works(sbki_other_list, resolvers=test_resolvers)


def test_internal_serializer_types():
sbki = SBKImpl('test_key')
key = 1
test_key = json_serialization._SerializedKey(key)
test_context = json_serialization._SerializedContext(sbki, 1)
test_serialization = json_serialization._ContextualSerialization(sbki)

key_json = test_key._json_dict_()
with pytest.raises(TypeError, match='_from_json_dict_'):
_ = json_serialization._SerializedKey._from_json_dict_(**key_json)

context_json = test_context._json_dict_()
with pytest.raises(TypeError, match='_from_json_dict_'):
_ = json_serialization._SerializedContext._from_json_dict_(**context_json)

serialization_json = test_serialization._json_dict_()
with pytest.raises(TypeError, match='_from_json_dict_'):
_ = json_serialization._ContextualSerialization._from_json_dict_(**serialization_json)


# during test setup deprecated submodules are inspected and trigger the
# deprecation error in testing. It is cleaner to just turn it off than to assert
# deprecation for each submodule.
Expand Down
Loading