From 84e456d0efd75f423eb525e5c1fc790b757af4f8 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Wed, 7 Jun 2023 23:27:06 +0100 Subject: [PATCH 1/2] GH-104996: Defer joining of `pathlib.PurePath()` arguments. (GH-104999) Joining of arguments is moved to `_load_parts`, which is called when a normalized path is needed. (cherry picked from commit ffeaec7e60c88d585deacb10264ba7a96e5e52df) --- Lib/pathlib.py | 38 ++++++++++++------- ...-05-26-21-24-06.gh-issue-104996.aaW78g.rst | 2 + 2 files changed, 26 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 29517e4c74db1c..d279fd2958b170 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -293,9 +293,9 @@ class PurePath(object): """ __slots__ = ( - # The `_raw_path` slot stores an unnormalized string path. This is set + # The `_raw_paths` slot stores unnormalized string paths. This is set # in the `__init__()` method. - '_raw_path', + '_raw_paths', # The `_drv`, `_root` and `_tail_cached` slots store parsed and # normalized parts of the path. They are set when any of the `drive`, @@ -352,10 +352,11 @@ def __init__(self, *args): paths = [] for arg in args: if isinstance(arg, PurePath): - path = arg._raw_path if arg._flavour is ntpath and self._flavour is posixpath: # GH-103631: Convert separators for backwards compatibility. - path = path.replace('\\', '/') + paths.extend(path.replace('\\', '/') for path in arg._raw_paths) + else: + paths.extend(arg._raw_paths) else: try: path = os.fspath(arg) @@ -366,13 +367,8 @@ def __init__(self, *args): "argument should be a str or an os.PathLike " "object where __fspath__ returns a str, " f"not {type(path).__name__!r}") - paths.append(path) - if len(paths) == 0: - self._raw_path = '' - elif len(paths) == 1: - self._raw_path = paths[0] - else: - self._raw_path = self._flavour.join(*paths) + paths.append(path) + self._raw_paths = paths def with_segments(self, *pathsegments): """Construct a new path object from any number of path-like objects. @@ -402,7 +398,14 @@ def _parse_path(cls, path): return drv, root, parsed def _load_parts(self): - drv, root, tail = self._parse_path(self._raw_path) + paths = self._raw_paths + if len(paths) == 0: + path = '' + elif len(paths) == 1: + path = paths[0] + else: + path = self._flavour.join(*paths) + drv, root, tail = self._parse_path(path) self._drv = drv self._root = root self._tail_cached = tail @@ -733,10 +736,17 @@ def parents(self): def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" - # ntpath.isabs() is defective - see GH-44626 . if self._flavour is ntpath: + # ntpath.isabs() is defective - see GH-44626. return bool(self.drive and self.root) - return self._flavour.isabs(self._raw_path) + elif self._flavour is posixpath: + # Optimization: work with raw paths on POSIX. + for path in self._raw_paths: + if path.startswith('/'): + return True + return False + else: + return self._flavour.isabs(str(self)) def is_reserved(self): """Return True if the path contains one of the special names reserved diff --git a/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst b/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst new file mode 100644 index 00000000000000..8b81b681af94aa --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-05-26-21-24-06.gh-issue-104996.aaW78g.rst @@ -0,0 +1,2 @@ +Improve performance of :class:`pathlib.PurePath` initialisation by +deferring joining of paths when multiple arguments are given. From 0eacac68e362223d76c0d4b64fdb87c16e186a9a Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 7 Jun 2023 23:45:46 +0100 Subject: [PATCH 2/2] [3.12] GH-104996: Defer joining of `pathlib.PurePath()` arguments. (GH-104999) Joining of arguments is moved to `_load_parts`, which is called when a normalized path is needed.. (cherry picked from commit ffeaec7e60c88d585deacb10264ba7a96e5e52df) Co-authored-by: Barney Gale