-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Rework SerializableByKey handling to improve performance #6469
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,7 +28,6 @@ | |
| Optional, | ||
| overload, | ||
| Sequence, | ||
| Set, | ||
| Tuple, | ||
| Type, | ||
| Union, | ||
|
|
@@ -221,10 +220,22 @@ class CirqEncoder(json.JSONEncoder): | |
| See https://github.com/quantumlib/Cirq/issues/2014 | ||
| """ | ||
|
|
||
| def __init__(self, *args, **kwargs) -> None: | ||
| super().__init__(*args, **kwargs) | ||
| self._memo: dict[Any, dict] = {} | ||
|
|
||
| def default(self, o): | ||
| # Object with custom method? | ||
| if hasattr(o, '_json_dict_'): | ||
| return _json_dict_with_cirq_type(o) | ||
| json_dict = _json_dict_with_cirq_type(o) | ||
| if isinstance(o, SerializableByKey): | ||
| if ref := self._memo.get(o): | ||
| return ref | ||
| key = len(self._memo) | ||
| ref = {"cirq_type": "REF", "key": key} | ||
| self._memo[o] = ref | ||
| return {"cirq_type": "VAL", "key": key, "val": json_dict} | ||
| return json_dict | ||
|
|
||
| # Sympy object? (Must come before general number checks.) | ||
| # TODO: More support for sympy | ||
|
|
@@ -306,27 +317,46 @@ def default(self, o): | |
| return super().default(o) # pragma: no cover | ||
|
|
||
|
|
||
| def _cirq_object_hook(d, resolvers: Sequence[JsonResolver], context_map: Dict[str, Any]): | ||
| if 'cirq_type' not in d: | ||
| return d | ||
| class ObjectHook: | ||
| """Callable to be used as object_hook during deserialization.""" | ||
|
|
||
| LEGACY_CONTEXT_TYPES = {'_ContextualSerialization', '_SerializedKey', '_SerializedContext'} | ||
|
|
||
| def __init__(self, resolvers: Sequence[JsonResolver]) -> None: | ||
| self.resolvers = resolvers | ||
| self.memo: Dict[int, SerializableByKey] = {} | ||
| self.context_map: Dict[int, SerializableByKey] = {} | ||
|
|
||
| if d['cirq_type'] == '_SerializedKey': | ||
| return _SerializedKey.read_from_context(context_map, **d) | ||
| def __call__(self, d): | ||
| cirq_type = d.get('cirq_type') | ||
| if cirq_type is None: | ||
| return d | ||
|
|
||
| if d['cirq_type'] == '_SerializedContext': | ||
| _SerializedContext.update_context(context_map, **d) | ||
| return None | ||
| if cirq_type == 'VAL': | ||
| obj = d['val'] | ||
| self.memo[d['key']] = obj | ||
| return obj | ||
|
|
||
| if d['cirq_type'] == '_ContextualSerialization': | ||
| return _ContextualSerialization.deserialize_with_context(**d) | ||
| if cirq_type == 'REF': | ||
| return self.memo[d['key']] | ||
|
|
||
| cls = factory_from_json(d['cirq_type'], resolvers=resolvers) | ||
| from_json_dict = getattr(cls, '_from_json_dict_', None) | ||
| if from_json_dict is not None: | ||
| return from_json_dict(**d) | ||
| # Deserialize from legacy "contextual serialization" format | ||
| if cirq_type in self.LEGACY_CONTEXT_TYPES: | ||
| if cirq_type == '_SerializedKey': | ||
| return self.context_map[d['key']] | ||
| if cirq_type == '_SerializedContext': | ||
| self.context_map[d['key']] = d['obj'] | ||
| return None | ||
| if cirq_type == '_ContextualSerialization': | ||
| return d['object_dag'][-1] | ||
|
|
||
| del d['cirq_type'] | ||
| return cls(**d) | ||
| cls = factory_from_json(cirq_type, resolvers=self.resolvers) | ||
| from_json_dict = getattr(cls, '_from_json_dict_', None) | ||
| if from_json_dict is not None: | ||
| return from_json_dict(**d) | ||
|
|
||
| del d['cirq_type'] | ||
| return cls(**d) | ||
|
|
||
|
|
||
| class SerializableByKey(SupportsJSON): | ||
|
|
@@ -338,137 +368,6 @@ class SerializableByKey(SupportsJSON): | |
| """ | ||
|
|
||
|
|
||
| class _SerializedKey(SupportsJSON): | ||
| """Internal object for holding a SerializableByKey key. | ||
|
|
||
| This is a private type used in contextual serialization. Its deserialization | ||
| is context-dependent, and is not expected to match the original; in other | ||
| words, `cls._from_json_dict_(obj._json_dict_())` does not return | ||
| the original `obj` for this type. | ||
| """ | ||
|
|
||
| def __init__(self, key: str): | ||
| self.key = key | ||
|
|
||
| def _json_dict_(self): | ||
| return obj_to_dict_helper(self, ['key']) | ||
|
|
||
| @classmethod | ||
| def _from_json_dict_(cls, **kwargs): | ||
| raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.') | ||
|
|
||
| @classmethod | ||
| def read_from_context(cls, context_map, key, **kwargs): | ||
| return context_map[key] | ||
|
|
||
|
|
||
| class _SerializedContext(SupportsJSON): | ||
| """Internal object for a single SerializableByKey key-to-object mapping. | ||
|
|
||
| This is a private type used in contextual serialization. Its deserialization | ||
| is context-dependent, and is not expected to match the original; in other | ||
| words, `cls._from_json_dict_(obj._json_dict_())` does not return | ||
| the original `obj` for this type. | ||
| """ | ||
|
|
||
| def __init__(self, obj: SerializableByKey, uid: int): | ||
| self.key = uid | ||
| self.obj = obj | ||
|
|
||
| def _json_dict_(self): | ||
| return obj_to_dict_helper(self, ['key', 'obj']) | ||
|
|
||
| @classmethod | ||
| def _from_json_dict_(cls, **kwargs): | ||
| raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.') | ||
|
|
||
| @classmethod | ||
| def update_context(cls, context_map, key, obj, **kwargs): | ||
| context_map.update({key: obj}) | ||
|
|
||
|
|
||
| class _ContextualSerialization(SupportsJSON): | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [No change needed] From our discussion offline: I think the issue with nested objects I was thinking of was linked to this "context table" implementation I used, which did not allow nested definitions. The new design, which allows a |
||
| """Internal object for serializing an object with its context. | ||
|
|
||
| This is a private type used in contextual serialization. Its deserialization | ||
| is context-dependent, and is not expected to match the original; in other | ||
| words, `cls._from_json_dict_(obj._json_dict_())` does not return | ||
| the original `obj` for this type. | ||
| """ | ||
|
|
||
| def __init__(self, obj: Any): | ||
| # Context information and the wrapped object are stored together in | ||
| # `object_dag` to ensure consistent serialization ordering. | ||
| self.object_dag = [] | ||
| context = [] | ||
| for sbk in get_serializable_by_keys(obj): | ||
| if sbk not in context: | ||
| context.append(sbk) | ||
| new_sc = _SerializedContext(sbk, len(context)) | ||
| self.object_dag.append(new_sc) | ||
| self.object_dag += [obj] | ||
|
|
||
| def _json_dict_(self): | ||
| return obj_to_dict_helper(self, ['object_dag']) | ||
|
|
||
| @classmethod | ||
| def _from_json_dict_(cls, **kwargs): | ||
| raise TypeError(f'Internal error: {cls} should never deserialize with _from_json_dict_.') | ||
|
|
||
| @classmethod | ||
| def deserialize_with_context(cls, object_dag, **kwargs): | ||
| # The last element of object_dag is the object to be deserialized. | ||
| return object_dag[-1] | ||
|
|
||
|
|
||
| def has_serializable_by_keys(obj: Any) -> bool: | ||
| """Returns true if obj contains one or more SerializableByKey objects.""" | ||
| if isinstance(obj, SerializableByKey): | ||
| return True | ||
| json_dict = getattr(obj, '_json_dict_', lambda: None)() | ||
| if isinstance(json_dict, Dict): | ||
| return any(has_serializable_by_keys(v) for v in json_dict.values()) | ||
|
|
||
| # Handle primitive container types. | ||
| if isinstance(obj, Dict): | ||
| return any(has_serializable_by_keys(elem) for pair in obj.items() for elem in pair) | ||
|
|
||
| if hasattr(obj, '__iter__') and not isinstance(obj, str): | ||
| # Return False on TypeError because some numpy values | ||
| # (like np.array(1)) have iterable methods | ||
| # yet return a TypeError when there is an attempt to iterate over them | ||
| try: | ||
| return any(has_serializable_by_keys(elem) for elem in obj) | ||
| except TypeError: | ||
| return False | ||
| return False | ||
|
|
||
|
|
||
| def get_serializable_by_keys(obj: Any) -> List[SerializableByKey]: | ||
| """Returns all SerializableByKeys contained by obj. | ||
|
|
||
| Objects are ordered such that nested objects appear before the object they | ||
| are nested inside. This is required to ensure SerializableByKeys are only | ||
| fully defined once in serialization. | ||
| """ | ||
| result = [] | ||
| if isinstance(obj, SerializableByKey): | ||
| result.append(obj) | ||
| json_dict = getattr(obj, '_json_dict_', lambda: None)() | ||
| if isinstance(json_dict, Dict): | ||
| for v in json_dict.values(): | ||
| result = get_serializable_by_keys(v) + result | ||
| if result: | ||
| return result | ||
|
|
||
| # Handle primitive container types. | ||
| if isinstance(obj, Dict): | ||
| return [sbk for pair in obj.items() for sbk in get_serializable_by_keys(pair)] | ||
| if hasattr(obj, '__iter__') and not isinstance(obj, str): | ||
| return [sbk for v in obj for sbk in get_serializable_by_keys(v)] | ||
| return [] | ||
|
|
||
|
|
||
| def json_namespace(type_obj: Type) -> str: | ||
| """Returns a namespace for JSON serialization of `type_obj`. | ||
|
|
||
|
|
@@ -610,37 +509,12 @@ def to_json( | |
| party classes, prefer adding the `_json_dict_` magic method | ||
| to your classes rather than overriding this default. | ||
| """ | ||
| if has_serializable_by_keys(obj): | ||
| obj = _ContextualSerialization(obj) | ||
|
|
||
| class ContextualEncoder(cls): # type: ignore | ||
| """An encoder with a context map for concise serialization.""" | ||
|
|
||
| # These lists populate gradually during serialization. An object | ||
| # with components defined in 'context' will represent those | ||
| # components using their keys instead of inline definition. | ||
| seen: Set[str] = set() | ||
|
|
||
| def default(self, o): | ||
| if not isinstance(o, SerializableByKey): | ||
| return super().default(o) | ||
| for candidate in obj.object_dag[:-1]: | ||
| if candidate.obj == o: | ||
| if not candidate.key in ContextualEncoder.seen: | ||
| ContextualEncoder.seen.add(candidate.key) | ||
| return _json_dict_with_cirq_type(candidate.obj) | ||
| else: | ||
| return _json_dict_with_cirq_type(_SerializedKey(candidate.key)) | ||
| raise ValueError("Object mutated during serialization.") # pragma: no cover | ||
|
|
||
| cls = ContextualEncoder | ||
|
|
||
| if file_or_fn is None: | ||
| return json.dumps(obj, indent=indent, separators=separators, cls=cls) | ||
|
|
||
| if isinstance(file_or_fn, (str, pathlib.Path)): | ||
| with open(file_or_fn, 'w') as actually_a_file: | ||
| json.dump(obj, actually_a_file, indent=indent, cls=cls) | ||
| json.dump(obj, actually_a_file, indent=indent, separators=separators, cls=cls) | ||
| return None | ||
|
|
||
| json.dump(obj, file_or_fn, indent=indent, separators=separators, cls=cls) | ||
|
|
@@ -682,10 +556,7 @@ def read_json( | |
| if resolvers is None: | ||
| resolvers = DEFAULT_RESOLVERS | ||
|
|
||
| context_map: Dict[str, 'SerializableByKey'] = {} | ||
|
|
||
| def obj_hook(x): | ||
| return _cirq_object_hook(x, resolvers, context_map) | ||
| obj_hook = ObjectHook(resolvers) | ||
|
|
||
| if json_text is not None: | ||
| return json.loads(json_text, object_hook=obj_hook) | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't be necessary -
SerializableByKeywrapsSupportsJSONwithout modification, so thehasattrcheck above already confirms thatoisSerializableByKey.This also suggests that
SerializableByKeycan be entirely replaced bySupportsJSON, although that may be more effort than it's worth. I suspect that in early iterationsSerializableByKeyhad some functionality, but I removed it before merging without removing the class itself.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's true that
SerializableByKeydoesn't add any methods, but it still serves as a marker for types that want to opt in to this "deduplication" during serialization. One this that is being added here is that we now require thatSerializableByKeyclasses must be hashable, which is not necessarily the case with all classes that implementSupportsJSON. If we want to change the opt-in mechanism that would be fine with me; I do think the nameSerializableByKeyis a bit misleading (and has been since #3673 when the_serialization_key_method was removed) so at very least we might consider changing the name.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good call. Renaming feels like it would be a headache to me, but given that this changes the serialization format anyways I guess now would be the time for it.
Happy to review any changes from this here or as a separate PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the
SerializableByKeyname itself no longer appears in the serialized format, renaming will be a source-only change and would not affect compatibility of deserializing stored data, so that should be low risk. Nevertheless, since naming always invites bikeshedding, I'm inclined to defer that to a future PR, so I'll leave this as-is for now :-)