From 0af5e1d98dde9dea91b7acad7e8043ea02dbab1b Mon Sep 17 00:00:00 2001 From: davidcellis Date: Mon, 20 Mar 2023 17:41:18 +0000 Subject: [PATCH 01/13] Special case 'atomic' types that are not deepcopied --- Lib/dataclasses.py | 69 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 13 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 82b08fc017884f..ef32f64e2a1f4f 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -222,6 +222,25 @@ def __repr__(self): # https://bugs.python.org/issue33453 for details. _MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)') +_ATOMIC_TYPES = { + types.NoneType, + types.EllipsisType, + types.NotImplementedType, + int, + float, + bool, + complex, + bytes, + str, + types.CodeType, + type, + range, + types.BuiltinFunctionType, + types.FunctionType, + # weakref.ref, # weakref is not currently imported by dataclasses directly + property, +} + # This function's logic is copied from "recursive_repr" function in # reprlib module to avoid dependency. def _recursive_repr(user_function): @@ -1295,7 +1314,9 @@ def _asdict_inner(obj, dict_factory): if _is_dataclass_instance(obj): result = [] for f in fields(obj): - value = _asdict_inner(getattr(obj, f.name), dict_factory) + value = getattr(obj, f.name) + if type(value) not in _ATOMIC_TYPES: + value = _asdict_inner(value, dict_factory) result.append((f.name, value)) return dict_factory(result) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): @@ -1318,23 +1339,33 @@ def _asdict_inner(obj, dict_factory): # namedtuples, we could no longer call asdict() on a data # structure where a namedtuple was used as a dict key. - return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) + return type(obj)(*[ + v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory) + for v in obj + ]) elif isinstance(obj, (list, tuple)): # Assume we can create an object of this type by passing in a # generator (which is not true for namedtuples, handled # above). - return type(obj)(_asdict_inner(v, dict_factory) for v in obj) + return type(obj)( + v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory) + for v in obj + ) elif isinstance(obj, dict): if hasattr(type(obj), 'default_factory'): # obj is a defaultdict, which has a different constructor from # dict as it requires the default_factory as its first arg. result = type(obj)(getattr(obj, 'default_factory')) for k, v in obj.items(): - result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) + k = k if type(k) in _ATOMIC_TYPES else _asdict_inner(k, dict_factory) + v = v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory) + result[k] = v return result - return type(obj)((_asdict_inner(k, dict_factory), - _asdict_inner(v, dict_factory)) - for k, v in obj.items()) + + return type(obj)( + (k if type(k) in _ATOMIC_TYPES else _asdict_inner(k, dict_factory), + v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory)) + for k, v in obj.items()) else: return copy.deepcopy(obj) @@ -1367,7 +1398,9 @@ def _astuple_inner(obj, tuple_factory): if _is_dataclass_instance(obj): result = [] for f in fields(obj): - value = _astuple_inner(getattr(obj, f.name), tuple_factory) + value = getattr(obj, f.name) + if type(value) not in _ATOMIC_TYPES: + value = _astuple_inner(value, tuple_factory) result.append(value) return tuple_factory(result) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): @@ -1377,12 +1410,18 @@ def _astuple_inner(obj, tuple_factory): # treated (see below), but we just need to create them # differently because a namedtuple's __init__ needs to be # called differently (see bpo-34363). - return type(obj)(*[_astuple_inner(v, tuple_factory) for v in obj]) + return type(obj)(*[ + v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory) + for v in obj + ]) elif isinstance(obj, (list, tuple)): # Assume we can create an object of this type by passing in a # generator (which is not true for namedtuples, handled # above). - return type(obj)(_astuple_inner(v, tuple_factory) for v in obj) + return type(obj)( + v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory) + for v in obj + ) elif isinstance(obj, dict): obj_type = type(obj) if hasattr(obj_type, 'default_factory'): @@ -1390,10 +1429,14 @@ def _astuple_inner(obj, tuple_factory): # dict as it requires the default_factory as its first arg. result = obj_type(getattr(obj, 'default_factory')) for k, v in obj.items(): - result[_astuple_inner(k, tuple_factory)] = _astuple_inner(v, tuple_factory) + k = k if type(k) in _ATOMIC_TYPES else _astuple_inner(k, tuple_factory) + v = v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory) + result[k] = v return result - return obj_type((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory)) - for k, v in obj.items()) + return obj_type( + (k if type(k) in _ATOMIC_TYPES else _astuple_inner(k, tuple_factory), + v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory)) + for k, v in obj.items()) else: return copy.deepcopy(obj) From 2c76ef8cd8fb8faefa7946a65f9ff3b9d09e800c Mon Sep 17 00:00:00 2001 From: davidcellis Date: Mon, 20 Mar 2023 17:41:39 +0000 Subject: [PATCH 02/13] Special case dict_factory=dict --- Lib/dataclasses.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index ef32f64e2a1f4f..34ef5411fa1a32 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1312,13 +1312,22 @@ class C: def _asdict_inner(obj, dict_factory): if _is_dataclass_instance(obj): - result = [] - for f in fields(obj): - value = getattr(obj, f.name) - if type(value) not in _ATOMIC_TYPES: - value = _asdict_inner(value, dict_factory) - result.append((f.name, value)) - return dict_factory(result) + if dict_factory is dict: + result = {} + for f in fields(obj): + value = getattr(obj, f.name) + if type(value) not in _ATOMIC_TYPES: + value = _asdict_inner(value, dict_factory) + result[f.name] = value + return result + else: + result = [] + for f in fields(obj): + value = getattr(obj, f.name) + if type(value) not in _ATOMIC_TYPES: + value = _asdict_inner(value, dict_factory) + result.append((f.name, value)) + return dict_factory(result) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): # obj is a namedtuple. Recurse into it, but the returned # object is another namedtuple of the same type. This is @@ -1365,7 +1374,7 @@ def _asdict_inner(obj, dict_factory): return type(obj)( (k if type(k) in _ATOMIC_TYPES else _asdict_inner(k, dict_factory), v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory)) - for k, v in obj.items()) + for k, v in obj.items()) else: return copy.deepcopy(obj) From c58017c5192e4df576de0613aecce4f1cfbb4956 Mon Sep 17 00:00:00 2001 From: davidcellis Date: Mon, 20 Mar 2023 21:10:54 +0000 Subject: [PATCH 03/13] Add comment explaining _ATOMIC_TYPES --- Lib/dataclasses.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 34ef5411fa1a32..50dc38cb63a0ba 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -222,6 +222,8 @@ def __repr__(self): # https://bugs.python.org/issue33453 for details. _MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)') +# Types for which deepcopy(obj) returns obj unmodified +# Used to skip deepcopy in asdict and astuple _ATOMIC_TYPES = { types.NoneType, types.EllipsisType, From 3a650a123546e1776f2a0ca29b37a6f8d6c98d69 Mon Sep 17 00:00:00 2001 From: davidcellis Date: Fri, 24 Mar 2023 10:56:36 +0000 Subject: [PATCH 04/13] Reorder atomic types and clarify intention --- Lib/dataclasses.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index dc1091e989a0b1..76676b98d9ccd1 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -222,25 +222,27 @@ def __repr__(self): # https://bugs.python.org/issue33453 for details. _MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)') -# Types for which deepcopy(obj) returns obj unmodified -# Used to skip deepcopy in asdict and astuple +# Types for which deepcopy(obj) is known to return obj unmodified +# Used to skip deepcopy in asdict and astuple for performance _ATOMIC_TYPES = { + # Common JSON Serializable types types.NoneType, - types.EllipsisType, - types.NotImplementedType, + bool, int, float, - bool, complex, bytes, str, + # Other types that are also unaffected by deepcopy + types.EllipsisType, + types.NotImplementedType, types.CodeType, - type, - range, types.BuiltinFunctionType, types.FunctionType, - # weakref.ref, # weakref is not currently imported by dataclasses directly + type, + range, property, + # weakref.ref, # weakref is not currently imported by dataclasses directly } # This function's logic is copied from "recursive_repr" function in From b6d8a6f7b5f04fe16e0853ce2c95c583cd289493 Mon Sep 17 00:00:00 2001 From: David Ellis Date: Fri, 24 Mar 2023 18:09:05 +0000 Subject: [PATCH 05/13] Update Lib/dataclasses.py Improve comment describing _ATOMIC_TYPES Co-authored-by: Carl Meyer --- Lib/dataclasses.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 76676b98d9ccd1..ec22b6c6099ebb 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -222,8 +222,8 @@ def __repr__(self): # https://bugs.python.org/issue33453 for details. _MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)') -# Types for which deepcopy(obj) is known to return obj unmodified -# Used to skip deepcopy in asdict and astuple for performance +# Atomic immutable types which don't require any recursive handling and for which deepcopy +# returns the same object. We can provide a fast-path for these types in asdict and astuple. _ATOMIC_TYPES = { # Common JSON Serializable types types.NoneType, From bf320be0d747672f8b0cc1b27618176b07abf3f9 Mon Sep 17 00:00:00 2001 From: David Ellis Date: Fri, 24 Mar 2023 18:09:35 +0000 Subject: [PATCH 06/13] Update Lib/dataclasses.py Remove comment referring to weakref Co-authored-by: Carl Meyer --- Lib/dataclasses.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index ec22b6c6099ebb..5eded82573f3e1 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -242,7 +242,6 @@ def __repr__(self): type, range, property, - # weakref.ref, # weakref is not currently imported by dataclasses directly } # This function's logic is copied from "recursive_repr" function in From 6280f2ed4949f77b9771f72287fd89a5cc67b938 Mon Sep 17 00:00:00 2001 From: davidcellis Date: Fri, 24 Mar 2023 18:11:16 +0000 Subject: [PATCH 07/13] Convert _ATOMIC_TYPES to frozenset --- Lib/dataclasses.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 5eded82573f3e1..940a91d0c6f1e6 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -224,7 +224,7 @@ def __repr__(self): # Atomic immutable types which don't require any recursive handling and for which deepcopy # returns the same object. We can provide a fast-path for these types in asdict and astuple. -_ATOMIC_TYPES = { +_ATOMIC_TYPES = frozenset({ # Common JSON Serializable types types.NoneType, bool, @@ -242,7 +242,7 @@ def __repr__(self): type, range, property, -} +}) # This function's logic is copied from "recursive_repr" function in # reprlib module to avoid dependency. From 7343fe696dcecffe3297a893edc1cc21e48d0062 Mon Sep 17 00:00:00 2001 From: davidcellis Date: Fri, 24 Mar 2023 18:18:07 +0000 Subject: [PATCH 08/13] Correction - complex and bytes are not JSON serializable (by default by the python stdlib json module). --- Lib/dataclasses.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 940a91d0c6f1e6..4f9dc29b073278 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -230,9 +230,10 @@ def __repr__(self): bool, int, float, + str, + # Other common types complex, bytes, - str, # Other types that are also unaffected by deepcopy types.EllipsisType, types.NotImplementedType, From b77c17ed417bb3621bf970a16a1ae6a88547ae54 Mon Sep 17 00:00:00 2001 From: davidcellis Date: Fri, 24 Mar 2023 18:26:18 +0000 Subject: [PATCH 09/13] Remove dict special case for now --- Lib/dataclasses.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 4f9dc29b073278..4afd9e2be05de2 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1316,22 +1316,13 @@ class C: def _asdict_inner(obj, dict_factory): if _is_dataclass_instance(obj): - if dict_factory is dict: - result = {} - for f in fields(obj): - value = getattr(obj, f.name) - if type(value) not in _ATOMIC_TYPES: - value = _asdict_inner(value, dict_factory) - result[f.name] = value - return result - else: - result = [] - for f in fields(obj): - value = getattr(obj, f.name) - if type(value) not in _ATOMIC_TYPES: - value = _asdict_inner(value, dict_factory) - result.append((f.name, value)) - return dict_factory(result) + result = [] + for f in fields(obj): + value = getattr(obj, f.name) + if type(value) not in _ATOMIC_TYPES: + value = _asdict_inner(value, dict_factory) + result.append((f.name, value)) + return dict_factory(result) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): # obj is a namedtuple. Recurse into it, but the returned # object is another namedtuple of the same type. This is From 0209f3cfd25889bc6593daa90e9c4c4bcf412ac0 Mon Sep 17 00:00:00 2001 From: davidcellis Date: Fri, 24 Mar 2023 20:25:25 +0000 Subject: [PATCH 10/13] Revert function call skip for _asdict_inner --- Lib/dataclasses.py | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 4afd9e2be05de2..58cafb5f9b0449 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1315,12 +1315,12 @@ class C: def _asdict_inner(obj, dict_factory): - if _is_dataclass_instance(obj): + if type(obj) in _ATOMIC_TYPES: + return obj + elif _is_dataclass_instance(obj): result = [] for f in fields(obj): - value = getattr(obj, f.name) - if type(value) not in _ATOMIC_TYPES: - value = _asdict_inner(value, dict_factory) + value = _asdict_inner(getattr(obj, f.name), dict_factory) result.append((f.name, value)) return dict_factory(result) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): @@ -1343,33 +1343,23 @@ def _asdict_inner(obj, dict_factory): # namedtuples, we could no longer call asdict() on a data # structure where a namedtuple was used as a dict key. - return type(obj)(*[ - v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory) - for v in obj - ]) + return type(obj)(*[_asdict_inner(v, dict_factory) for v in obj]) elif isinstance(obj, (list, tuple)): # Assume we can create an object of this type by passing in a # generator (which is not true for namedtuples, handled # above). - return type(obj)( - v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory) - for v in obj - ) + return type(obj)(_asdict_inner(v, dict_factory) for v in obj) elif isinstance(obj, dict): if hasattr(type(obj), 'default_factory'): # obj is a defaultdict, which has a different constructor from # dict as it requires the default_factory as its first arg. result = type(obj)(getattr(obj, 'default_factory')) for k, v in obj.items(): - k = k if type(k) in _ATOMIC_TYPES else _asdict_inner(k, dict_factory) - v = v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory) - result[k] = v + result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory) return result - - return type(obj)( - (k if type(k) in _ATOMIC_TYPES else _asdict_inner(k, dict_factory), - v if type(v) in _ATOMIC_TYPES else _asdict_inner(v, dict_factory)) - for k, v in obj.items()) + return type(obj)((_asdict_inner(k, dict_factory), + _asdict_inner(v, dict_factory)) + for k, v in obj.items()) else: return copy.deepcopy(obj) From 986371cd9ac48068dad3529ff7c5c53bda009c75 Mon Sep 17 00:00:00 2001 From: davidcellis Date: Fri, 24 Mar 2023 20:29:29 +0000 Subject: [PATCH 11/13] Revert function call skip for _astuple_inner --- Lib/dataclasses.py | 28 +++++++++------------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 58cafb5f9b0449..24c986c528fb99 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1389,12 +1389,12 @@ class C: def _astuple_inner(obj, tuple_factory): - if _is_dataclass_instance(obj): + if type(obj) in _ATOMIC_TYPES: + return obj + elif _is_dataclass_instance(obj): result = [] for f in fields(obj): - value = getattr(obj, f.name) - if type(value) not in _ATOMIC_TYPES: - value = _astuple_inner(value, tuple_factory) + value = _astuple_inner(getattr(obj, f.name), tuple_factory) result.append(value) return tuple_factory(result) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): @@ -1404,18 +1404,12 @@ def _astuple_inner(obj, tuple_factory): # treated (see below), but we just need to create them # differently because a namedtuple's __init__ needs to be # called differently (see bpo-34363). - return type(obj)(*[ - v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory) - for v in obj - ]) + return type(obj)(*[_astuple_inner(v, tuple_factory) for v in obj]) elif isinstance(obj, (list, tuple)): # Assume we can create an object of this type by passing in a # generator (which is not true for namedtuples, handled # above). - return type(obj)( - v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory) - for v in obj - ) + return type(obj)(_astuple_inner(v, tuple_factory) for v in obj) elif isinstance(obj, dict): obj_type = type(obj) if hasattr(obj_type, 'default_factory'): @@ -1423,14 +1417,10 @@ def _astuple_inner(obj, tuple_factory): # dict as it requires the default_factory as its first arg. result = obj_type(getattr(obj, 'default_factory')) for k, v in obj.items(): - k = k if type(k) in _ATOMIC_TYPES else _astuple_inner(k, tuple_factory) - v = v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory) - result[k] = v + result[_astuple_inner(k, tuple_factory)] = _astuple_inner(v, tuple_factory) return result - return obj_type( - (k if type(k) in _ATOMIC_TYPES else _astuple_inner(k, tuple_factory), - v if type(v) in _ATOMIC_TYPES else _astuple_inner(v, tuple_factory)) - for k, v in obj.items()) + return obj_type((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory)) + for k, v in obj.items()) else: return copy.deepcopy(obj) From 801af8237a448c5c8bfba41221f1970495c8a4be Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 24 Mar 2023 20:49:49 +0000 Subject: [PATCH 12/13] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2023-03-24-20-49-48.gh-issue-103000.6eVNZI.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2023-03-24-20-49-48.gh-issue-103000.6eVNZI.rst diff --git a/Misc/NEWS.d/next/Library/2023-03-24-20-49-48.gh-issue-103000.6eVNZI.rst b/Misc/NEWS.d/next/Library/2023-03-24-20-49-48.gh-issue-103000.6eVNZI.rst new file mode 100644 index 00000000000000..1d6a4edef1f49d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-24-20-49-48.gh-issue-103000.6eVNZI.rst @@ -0,0 +1 @@ +Performance improvement for :func:`dataclasses.astuple` and :func:`dataclasses.asdict` in cases where the contents are common python types. From 903b2fb8a04d0a070543805da848de03c5c8eefb Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 10 Apr 2023 22:21:45 +0100 Subject: [PATCH 13/13] Update Misc/NEWS.d/next/Library/2023-03-24-20-49-48.gh-issue-103000.6eVNZI.rst --- .../Library/2023-03-24-20-49-48.gh-issue-103000.6eVNZI.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2023-03-24-20-49-48.gh-issue-103000.6eVNZI.rst b/Misc/NEWS.d/next/Library/2023-03-24-20-49-48.gh-issue-103000.6eVNZI.rst index 1d6a4edef1f49d..15f16d9eb4c1bf 100644 --- a/Misc/NEWS.d/next/Library/2023-03-24-20-49-48.gh-issue-103000.6eVNZI.rst +++ b/Misc/NEWS.d/next/Library/2023-03-24-20-49-48.gh-issue-103000.6eVNZI.rst @@ -1 +1,2 @@ -Performance improvement for :func:`dataclasses.astuple` and :func:`dataclasses.asdict` in cases where the contents are common python types. +Improve performance of :func:`dataclasses.astuple` and +:func:`dataclasses.asdict` in cases where the contents are common Python types.