From 037a71712f159301e43b443ba72b898be53d052f Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Fri, 16 Dec 2022 02:40:27 +0400 Subject: [PATCH 01/15] Replace recursion with iteration --- Lib/pathlib.py | 72 ++++++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f31eb3010368d5..195f008166445f 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -19,6 +19,7 @@ from operator import attrgetter from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from urllib.parse import quote_from_bytes as urlquote_from_bytes +from collections import deque __all__ = [ @@ -1282,42 +1283,51 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): return self._walk(top_down, on_error, follow_symlinks) def _walk(self, top_down, on_error, follow_symlinks): - # We may not have read permission for self, in which case we can't - # get a list of the files the directory contains. os.walk - # always suppressed the exception then, rather than blow up for a - # minor reason when (say) a thousand readable directories are still - # left to visit. That logic is copied here. - try: - scandir_it = self._scandir() - except OSError as error: - if on_error is not None: - on_error(error) - return + stack: ... = deque(((False, self),)) - with scandir_it: - dirnames = [] - filenames = [] - for entry in scandir_it: - try: - is_dir = entry.is_dir(follow_symlinks=follow_symlinks) - except OSError: - # Carried over from os.path.isdir(). - is_dir = False + while stack: + is_result, top = stack.pop() + if is_result: + yield top + continue - if is_dir: - dirnames.append(entry.name) - else: - filenames.append(entry.name) - if top_down: - yield self, dirnames, filenames + # We may not have read permission for self, in which case we can't + # get a list of the files the directory contains. os.walk + # always suppressed the exception then, rather than blow up for a + # minor reason when (say) a thousand readable directories are still + # left to visit. That logic is copied here. + try: + scandir_it = self._scandir() + except OSError as error: + if on_error is not None: + on_error(error) + continue + + with scandir_it: + dirnames = [] + filenames = [] + for entry in scandir_it: + try: + is_dir = entry.is_dir(follow_symlinks=follow_symlinks) + except OSError: + # Carried over from os.path.isdir(). + is_dir = False + + if is_dir: + dirnames.append(entry.name) + else: + filenames.append(entry.name) + + if top_down: + stack.append((True, (top, dirnames, filenames))) - for dirname in dirnames: - dirpath = self._make_child_relpath(dirname) - yield from dirpath._walk(top_down, on_error, follow_symlinks) + for dirname in dirnames: + dirpath = self._make_child_relpath(dirname) + stack.append((False, dirpath)) - if not top_down: - yield self, dirnames, filenames + if not top_down: + stack.append((True, (top, dirnames, filenames))) class PosixPath(Path, PurePosixPath): From f8968b647f07b7826c2d073d3007220f96bcef37 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Fri, 16 Dec 2022 02:51:32 +0400 Subject: [PATCH 02/15] Unify walk and _walk methods --- Lib/pathlib.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 195f008166445f..4172a9a3336571 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1280,9 +1280,6 @@ def expanduser(self): def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) - return self._walk(top_down, on_error, follow_symlinks) - - def _walk(self, top_down, on_error, follow_symlinks): stack: ... = deque(((False, self),)) while stack: @@ -1291,7 +1288,6 @@ def _walk(self, top_down, on_error, follow_symlinks): yield top continue - # We may not have read permission for self, in which case we can't # get a list of the files the directory contains. os.walk # always suppressed the exception then, rather than blow up for a From ecabf8c99a361488935e7d49555d1199a3b50261 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Fri, 16 Dec 2022 03:10:29 +0400 Subject: [PATCH 03/15] Remove empty typehint --- Lib/pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 4172a9a3336571..c2a12067d1be03 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1280,7 +1280,7 @@ def expanduser(self): def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) - stack: ... = deque(((False, self),)) + stack = deque(((False, self),)) while stack: is_result, top = stack.pop() From 6ec3c3e084151d6923cb45fca401bbd53b07c656 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Fri, 16 Dec 2022 13:28:05 +0400 Subject: [PATCH 04/15] Fix all tests --- Lib/pathlib.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c2a12067d1be03..1e1046b943fa6f 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1283,8 +1283,8 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): stack = deque(((False, self),)) while stack: - is_result, top = stack.pop() - if is_result: + must_be_yielded_immediately, top = stack.pop() + if must_be_yielded_immediately: yield top continue @@ -1294,7 +1294,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): # minor reason when (say) a thousand readable directories are still # left to visit. That logic is copied here. try: - scandir_it = self._scandir() + scandir_it = top._scandir() except OSError as error: if on_error is not None: on_error(error) @@ -1316,15 +1316,14 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): filenames.append(entry.name) if top_down: + yield top, dirnames, filenames + else: stack.append((True, (top, dirnames, filenames))) - for dirname in dirnames: - dirpath = self._make_child_relpath(dirname) + for dirname in reversed(dirnames): + dirpath = top._make_child_relpath(dirname) stack.append((False, dirpath)) - if not top_down: - stack.append((True, (top, dirnames, filenames))) - class PosixPath(Path, PurePosixPath): """Path subclass for non-Windows systems. From fb5a4c7af1331a9f916032f6fa15779ec2cacc70 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Fri, 16 Dec 2022 14:19:50 +0400 Subject: [PATCH 05/15] Add test for recursion limit --- Lib/test/test_pathlib.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 1d01d3cbd91d14..a5aebba7a39907 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -13,6 +13,7 @@ from unittest import mock from test.support import import_helper +from test.support import infinite_recursion from test.support import is_emscripten, is_wasi from test.support import os_helper from test.support.os_helper import TESTFN, FakePath @@ -2766,6 +2767,15 @@ def test_walk_many_open_files(self): self.assertEqual(next(it), expected) path = path / 'd' + def test_walk_above_recursion_limit(self): + base = pathlib.Path(os_helper.TESTFN, 'deep') + path = pathlib.Path(base, *(['d']*50)) + path.mkdir(parents=True) + + with infinite_recursion(25): + list(base.walk()) + list(base.walk(top_down=False)) + class PathTest(_BasePathTest, unittest.TestCase): cls = pathlib.Path From 55beb6ef59acedac5aab020ba7e8f4238d749ce9 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 16 Dec 2022 10:28:00 +0000 Subject: [PATCH 06/15] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst diff --git a/Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst b/Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst new file mode 100644 index 00000000000000..f9ac1475dceb00 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-12-16-10-27-58.gh-issue-89727.y64ZLM.rst @@ -0,0 +1 @@ +Fix pathlib.Path.walk RecursionError on deep directory trees by rewriting it using iteration instead of recursion. From 508bd4df5db39a12cf70ff959c3d1262b32c0a0c Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Fri, 16 Dec 2022 15:01:06 +0400 Subject: [PATCH 07/15] Fix recursion limit issue in different envs --- Lib/test/test_pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index a5aebba7a39907..81a823dedd73ea 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2772,7 +2772,7 @@ def test_walk_above_recursion_limit(self): path = pathlib.Path(base, *(['d']*50)) path.mkdir(parents=True) - with infinite_recursion(25): + with infinite_recursion(40): list(base.walk()) list(base.walk(top_down=False)) From 20476ffca31adb6586a6d4dd9f98c564562f4fd5 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sat, 17 Dec 2022 21:32:01 +0400 Subject: [PATCH 08/15] Make everything consistent with os.walk --- Lib/pathlib.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 1e1046b943fa6f..f62fc5a94a11f2 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1280,11 +1280,11 @@ def expanduser(self): def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) - stack = deque(((False, self),)) + stack = [(False, self)] while stack: - must_be_yielded_immediately, top = stack.pop() - if must_be_yielded_immediately: + must_yield, top = stack.pop() + if must_yield: yield top continue From 4f2de627040f6c5b7fec5088d1c1be556cada890 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Tue, 20 Dec 2022 15:54:06 +0400 Subject: [PATCH 09/15] Remove unused import --- Lib/pathlib.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f62fc5a94a11f2..599ed661961e72 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -19,7 +19,6 @@ from operator import attrgetter from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from urllib.parse import quote_from_bytes as urlquote_from_bytes -from collections import deque __all__ = [ From 17497f06ffd33056e3f27e4bc2a3dea73cd7c4ca Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Wed, 21 Dec 2022 01:49:29 +0400 Subject: [PATCH 10/15] Replace the usage of infinite_recursion with set_recursion_limit --- Lib/test/test_pathlib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 32747cc6b63e31..fd24b712dc365d 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -13,7 +13,7 @@ from unittest import mock from test.support import import_helper -from test.support import infinite_recursion +from test.support import set_recursion_limit from test.support import is_emscripten, is_wasi from test.support import os_helper from test.support.os_helper import TESTFN, FakePath @@ -2774,7 +2774,7 @@ def test_walk_above_recursion_limit(self): path = pathlib.Path(base, *(['d']*50)) path.mkdir(parents=True) - with infinite_recursion(40): + with set_recursion_limit(40): list(base.walk()) list(base.walk(top_down=False)) From 5609e4ff73cc916cc1c3f3a6aada0f4afa84eae5 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sun, 1 Jan 2023 17:57:14 +0400 Subject: [PATCH 11/15] Code review fixes --- Lib/pathlib.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index fbee40473f5d95..f1aef801af8553 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1214,12 +1214,12 @@ def expanduser(self): def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) - stack = [(False, self)] + stack = [self] while stack: - must_yield, top = stack.pop() - if must_yield: - yield top + path = stack.pop() + if isinstance(path, tuple): + yield path continue # We may not have read permission for self, in which case we can't @@ -1228,7 +1228,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): # minor reason when (say) a thousand readable directories are still # left to visit. That logic is copied here. try: - scandir_it = top._scandir() + scandir_it = path._scandir() except OSError as error: if on_error is not None: on_error(error) @@ -1250,13 +1250,13 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): filenames.append(entry.name) if top_down: - yield top, dirnames, filenames + yield path, dirnames, filenames else: - stack.append((True, (top, dirnames, filenames))) + stack.append((path, dirnames, filenames)) for dirname in reversed(dirnames): - dirpath = top._make_child_relpath(dirname) - stack.append((False, dirpath)) + dirpath = path._make_child_relpath(dirname) + stack.append(dirpath) class PosixPath(Path, PurePosixPath): From b7d36de468c786257340ca44bab3f1a56d43014d Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Mon, 2 Jan 2023 23:57:35 +0400 Subject: [PATCH 12/15] Update Lib/pathlib.py Co-authored-by: Barney Gale --- Lib/pathlib.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f1aef801af8553..e97e7164ae1ac2 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1254,9 +1254,8 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): else: stack.append((path, dirnames, filenames)) - for dirname in reversed(dirnames): - dirpath = path._make_child_relpath(dirname) - stack.append(dirpath) + stack += [path._make_child_relpath(dirname) + for dirname in reversed(dirnames)] class PosixPath(Path, PurePosixPath): From d428a8923b69579d9fe629ee47bb72f1cc832359 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Wed, 4 Jan 2023 16:34:51 +0400 Subject: [PATCH 13/15] Code review fixes --- Lib/pathlib.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index e97e7164ae1ac2..d43809cb95bcbc 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1254,8 +1254,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): else: stack.append((path, dirnames, filenames)) - stack += [path._make_child_relpath(dirname) - for dirname in reversed(dirnames)] + stack += [path._make_child_relpath(d) for d in reversed(dirnames)] class PosixPath(Path, PurePosixPath): From 49d423711d8d5dff4cede4e70be167056bce7727 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sat, 21 Jan 2023 05:05:42 +0400 Subject: [PATCH 14/15] Apply suggestions from code review Co-authored-by: Brett Cannon --- Lib/pathlib.py | 8 ++++---- Lib/test/test_pathlib.py | 7 +++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 6f17e8e6272b59..cffd72ca6ca7a1 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1225,10 +1225,10 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): continue # We may not have read permission for self, in which case we can't - # get a list of the files the directory contains. os.walk - # always suppressed the exception then, rather than blow up for a - # minor reason when (say) a thousand readable directories are still - # left to visit. That logic is copied here. + # get a list of the files the directory contains. os.walk() + # always suppressed the exception in that instance, rather than + # blow up for a minor reason when (say) a thousand readable + # directories are still left to visit. That logic is copied here. try: scandir_it = path._scandir() except OSError as error: diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index dd13e8be062678..e356dff9f4d6f8 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2771,11 +2771,14 @@ def test_walk_many_open_files(self): path = path / 'd' def test_walk_above_recursion_limit(self): + recursion_limit = 40 + # directory_depth > recursion_limit + directory_depth = recursion_limit + 10 base = pathlib.Path(os_helper.TESTFN, 'deep') - path = pathlib.Path(base, *(['d']*50)) + path = pathlib.Path(base, *(['d'] * directory_depth)) path.mkdir(parents=True) - with set_recursion_limit(40): + with set_recursion_limit(recursion_limit): list(base.walk()) list(base.walk(top_down=False)) From 9bc2500855546a1cb4b83bedb75b02da1668c3d9 Mon Sep 17 00:00:00 2001 From: Stanislav Zmiev Date: Sun, 19 Mar 2023 20:09:13 +0400 Subject: [PATCH 15/15] Rename stack to paths --- Lib/pathlib.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index cffd72ca6ca7a1..374aa2d7b13714 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1216,10 +1216,10 @@ def expanduser(self): def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) - stack = [self] + paths = [self] - while stack: - path = stack.pop() + while paths: + path = paths.pop() if isinstance(path, tuple): yield path continue @@ -1254,9 +1254,9 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): if top_down: yield path, dirnames, filenames else: - stack.append((path, dirnames, filenames)) + paths.append((path, dirnames, filenames)) - stack += [path._make_child_relpath(d) for d in reversed(dirnames)] + paths += [path._make_child_relpath(d) for d in reversed(dirnames)] class PosixPath(Path, PurePosixPath):