From 492285e30db8141a1078b6f38e3235236ff53410 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 13 Apr 2024 22:33:33 +0100 Subject: [PATCH 1/2] GH-117842: Speed up `pathlib.Path.glob()` on Windows by using `lexists()` Use `os.path.lexists()` rather than `os.lstat()` to test whether paths exist. This is equivalent on POSIX, but faster on Windows. --- Lib/glob.py | 10 +++------- Lib/pathlib/_abc.py | 10 +++++++++- .../2024-04-13-22-23-48.gh-issue-117842.ZGoQqd.rst | 2 ++ 3 files changed, 14 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-13-22-23-48.gh-issue-117842.ZGoQqd.rst diff --git a/Lib/glob.py b/Lib/glob.py index 72cf22299763f0..4843b22e7e2695 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -339,7 +339,7 @@ def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): # Low-level methods - lstat = staticmethod(os.lstat) + lexists = staticmethod(os.path.lexists) scandir = staticmethod(os.scandir) parse_entry = operator.attrgetter('path') concat_path = operator.add @@ -512,12 +512,8 @@ def select_exists(self, path, exists=False): # Optimization: this path is already known to exist, e.g. because # it was returned from os.scandir(), so we skip calling lstat(). yield path - else: - try: - self.lstat(path) - yield path - except OSError: - pass + elif self.lexists(path): + yield path @classmethod def walk(cls, root, top_down, on_error, follow_symlinks): diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index b51ad6f46d292a..404568177e1a05 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -44,9 +44,17 @@ def _is_case_sensitive(parser): class Globber(glob._Globber): - lstat = operator.methodcaller('lstat') add_slash = operator.methodcaller('joinpath', '') + @staticmethod + def lexists(path): + # Emulate os.path.lexists(), which never raises OSError. + try: + path.stat(follow_symlinks=False) + except (OSError, ValueError): + return False + return True + @staticmethod def scandir(path): # Emulate os.scandir(), which returns an object that can be used as a diff --git a/Misc/NEWS.d/next/Library/2024-04-13-22-23-48.gh-issue-117842.ZGoQqd.rst b/Misc/NEWS.d/next/Library/2024-04-13-22-23-48.gh-issue-117842.ZGoQqd.rst new file mode 100644 index 00000000000000..3549ae4b9a6567 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-13-22-23-48.gh-issue-117842.ZGoQqd.rst @@ -0,0 +1,2 @@ +Speed up :meth:`pathlib.Path.glob` on Windows by using fast implementation +of :func:`os.path.lexists`. From 6a9a497b68843e882ed5c961a99e171e538742a0 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 15 Apr 2024 19:55:43 +0100 Subject: [PATCH 2/2] Fix comment. --- Lib/glob.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/glob.py b/Lib/glob.py index 4843b22e7e2695..4d59ce9a995367 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -510,7 +510,7 @@ def select_exists(self, path, exists=False): """ if exists: # Optimization: this path is already known to exist, e.g. because - # it was returned from os.scandir(), so we skip calling lstat(). + # it was returned from os.scandir(), so we skip calling lexists(). yield path elif self.lexists(path): yield path