From 1f7d5adcf5b2124b3a896488c0dec38a8224fe40 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 26 May 2024 11:59:54 +0100 Subject: [PATCH] GH-119169: Implement `pathlib.Path.walk()` using `os.walk()` For silly reasons, pathlib's generic implementation of `walk()` currently resides in `glob._Globber`. This commit moves it into `pathlib._abc.PathBase.walk()` where it really belongs, and makes `pathlib.Path.walk()` call though to `os.walk()`. Symlink handling is a little different between the two `walk()` implementations when `followlinks=False`. In `pathlib` it means never following symlinks, not even for distinguishing between files and directories. In `os` it means never *walking* into symlinks, including any symlinks created by the user between iterations. We smooth over these differences with a private sentinel - `os._walk_symlinks_as_files` - that enables the pathlib behaviour. --- Lib/glob.py | 37 ------------------------------------- Lib/os.py | 7 ++++++- Lib/pathlib/_abc.py | 32 +++++++++++++++++++++++++++++++- Lib/pathlib/_local.py | 4 +++- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/Lib/glob.py b/Lib/glob.py index 920f79ad7e1fe5..fbb1d35aab71fa 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -519,43 +519,6 @@ def select_exists(self, path, exists=False): elif self.lexists(path): yield path - @classmethod - def walk(cls, root, top_down, on_error, follow_symlinks): - """Walk the directory tree from the given root, similar to os.walk(). - """ - paths = [root] - while paths: - path = paths.pop() - if isinstance(path, tuple): - yield path - continue - try: - with cls.scandir(path) as scandir_it: - dirnames = [] - filenames = [] - if not top_down: - paths.append((path, dirnames, filenames)) - for entry in scandir_it: - name = entry.name - try: - if entry.is_dir(follow_symlinks=follow_symlinks): - if not top_down: - paths.append(cls.parse_entry(entry)) - dirnames.append(name) - else: - filenames.append(name) - except OSError: - filenames.append(name) - except OSError as error: - if on_error is not None: - on_error(error) - else: - if top_down: - yield path, dirnames, filenames - if dirnames: - prefix = cls.add_slash(path) - paths += [cls.concat_path(prefix, d) for d in reversed(dirnames)] - class _StringGlobber(_Globber): lexists = staticmethod(os.path.lexists) diff --git a/Lib/os.py b/Lib/os.py index 7661ce68ca3be2..5feb17710bab0e 100644 --- a/Lib/os.py +++ b/Lib/os.py @@ -281,6 +281,11 @@ def renames(old, new): __all__.extend(["makedirs", "removedirs", "renames"]) +# Private sentinel that can be passed to os.walk() to classify all symlinks as +# files, and walk into every path classified as a directory (potentially after +# user modification in topdown mode). Used by pathlib.Path.walk(). +_walk_symlinks_as_files = object() + def walk(top, topdown=True, onerror=None, followlinks=False): """Directory tree generator. @@ -382,7 +387,7 @@ def walk(top, topdown=True, onerror=None, followlinks=False): break try: - is_dir = entry.is_dir() + is_dir = entry.is_dir(follow_symlinks=followlinks is not _walk_symlinks_as_files) except OSError: # If is_dir() raises an OSError, consider the entry not to # be a directory, same behaviour as os.path.isdir(). diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 3cdbb735096edb..a2b4328a7fdfeb 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -623,7 +623,37 @@ def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True): def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" - return self._globber.walk(self, top_down, on_error, follow_symlinks) + paths = [self] + while paths: + path = paths.pop() + if isinstance(path, tuple): + yield path + continue + dirnames = [] + filenames = [] + if not top_down: + paths.append((path, dirnames, filenames)) + try: + for child in path.iterdir(): + try: + if child.is_dir(follow_symlinks=follow_symlinks): + if not top_down: + paths.append(child) + dirnames.append(child.name) + else: + filenames.append(child.name) + except OSError: + filenames.append(child.name) + except OSError as error: + if on_error is not None: + on_error(error) + if not top_down: + while not isinstance(paths.pop(), tuple): + pass + continue + if top_down: + yield path, dirnames, filenames + paths += [path.joinpath(d) for d in reversed(dirnames)] def absolute(self): """Return an absolute version of this path diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index f2776b1d20a2ea..96dfd2b8ce2610 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -638,7 +638,9 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) root_dir = str(self) - results = self._globber.walk(root_dir, top_down, on_error, follow_symlinks) + if not follow_symlinks: + follow_symlinks = os._walk_symlinks_as_files + results = os.walk(root_dir, top_down, on_error, follow_symlinks) for path_str, dirnames, filenames in results: if root_dir == '.': path_str = path_str[2:]