From 43a9885f59792338948bcb4645b02d170a6a9cd4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 1 Mar 2025 19:33:45 +0000 Subject: [PATCH] Revert "GH-116380: Speed up `glob.[i]glob()` by making fewer system calls. (#116392)" This broke tests on the 'aarch64 Fedora Stable Clang Installed 3.x' and 'AMD64 Fedora Stable Clang Installed 3.x' build bots. This reverts commit da4899b94a9a9083fed4972b2473546e0d997727. --- Doc/library/glob.rst | 18 +- Doc/whatsnew/3.14.rst | 8 - Lib/glob.py | 372 ++++++++++-------- Lib/pathlib/_abc.py | 3 +- Lib/pathlib/_local.py | 9 +- Lib/test/test_glob.py | 57 +-- ...-03-05-23-08-11.gh-issue-116380.56HU7I.rst | 2 - 7 files changed, 229 insertions(+), 240 deletions(-) delete mode 100644 Misc/NEWS.d/next/Library/2024-03-05-23-08-11.gh-issue-116380.56HU7I.rst diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 849316553e408a..684466d354aef8 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -75,6 +75,10 @@ The :mod:`glob` module defines the following functions: Using the "``**``" pattern in large directory trees may consume an inordinate amount of time. + .. note:: + This function may return duplicate path names if *pathname* + contains multiple "``**``" patterns and *recursive* is true. + .. versionchanged:: 3.5 Support for recursive globs using "``**``". @@ -84,11 +88,6 @@ The :mod:`glob` module defines the following functions: .. versionchanged:: 3.11 Added the *include_hidden* parameter. - .. versionchanged:: 3.14 - Matching path names are returned only once. In previous versions, this - function may return duplicate path names if *pathname* contains multiple - "``**``" patterns and *recursive* is true. - .. function:: iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, \ include_hidden=False) @@ -99,6 +98,10 @@ The :mod:`glob` module defines the following functions: .. audit-event:: glob.glob pathname,recursive glob.iglob .. audit-event:: glob.glob/2 pathname,recursive,root_dir,dir_fd glob.iglob + .. note:: + This function may return duplicate path names if *pathname* + contains multiple "``**``" patterns and *recursive* is true. + .. versionchanged:: 3.5 Support for recursive globs using "``**``". @@ -108,11 +111,6 @@ The :mod:`glob` module defines the following functions: .. versionchanged:: 3.11 Added the *include_hidden* parameter. - .. versionchanged:: 3.14 - Matching path names are yielded only once. In previous versions, this - function may yield duplicate path names if *pathname* contains multiple - "``**``" patterns and *recursive* is true. - .. function:: escape(pathname) diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 3c876a193fad32..dbd59a9d7be150 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -968,14 +968,6 @@ base64 (Contributed by Bénédikt Tran, Chris Markiewicz, and Adam Turner in :gh:`118761`.) -glob ----- - -* Reduce the number of system calls in :func:`glob.glob` and :func:`~glob.iglob`, - thereby improving the speed of globbing operations by 20-80%. - (Contributed by Barney Gale in :gh:`116380`.) - - io --- * :mod:`io` which provides the built-in :func:`open` makes less system calls diff --git a/Lib/glob.py b/Lib/glob.py index 7ac497d4f0075a..d1a6dddeeb1610 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -1,10 +1,13 @@ """Filename globbing utility.""" +import contextlib import os import re import fnmatch import functools +import itertools import operator +import stat import sys @@ -42,35 +45,82 @@ def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, """ sys.audit("glob.glob", pathname, recursive) sys.audit("glob.glob/2", pathname, recursive, root_dir, dir_fd) - pathname = os.fspath(pathname) - if isinstance(pathname, bytes): - pathname = os.fsdecode(pathname) - if root_dir is not None: - root_dir = os.fsdecode(root_dir) - paths = _iglob(pathname, root_dir, dir_fd, recursive, include_hidden) - return map(os.fsencode, paths) + if root_dir is not None: + root_dir = os.fspath(root_dir) + else: + root_dir = pathname[:0] + it = _iglob(pathname, root_dir, dir_fd, recursive, False, + include_hidden=include_hidden) + if not pathname or recursive and _isrecursive(pathname[:2]): + try: + s = next(it) # skip empty string + if s: + it = itertools.chain((s,), it) + except StopIteration: + pass + return it + +def _iglob(pathname, root_dir, dir_fd, recursive, dironly, + include_hidden=False): + dirname, basename = os.path.split(pathname) + if not has_magic(pathname): + assert not dironly + if basename: + if _lexists(_join(root_dir, pathname), dir_fd): + yield pathname + else: + # Patterns ending with a slash should match only directories + if _isdir(_join(root_dir, dirname), dir_fd): + yield pathname + return + if not dirname: + if recursive and _isrecursive(basename): + yield from _glob2(root_dir, basename, dir_fd, dironly, + include_hidden=include_hidden) + else: + yield from _glob1(root_dir, basename, dir_fd, dironly, + include_hidden=include_hidden) + return + # `os.path.split()` returns the argument itself as a dirname if it is a + # drive or UNC path. Prevent an infinite recursion if a drive or UNC path + # contains magic characters (i.e. r'\\?\C:'). + if dirname != pathname and has_magic(dirname): + dirs = _iglob(dirname, root_dir, dir_fd, recursive, True, + include_hidden=include_hidden) + else: + dirs = [dirname] + if has_magic(basename): + if recursive and _isrecursive(basename): + glob_in_dir = _glob2 + else: + glob_in_dir = _glob1 else: - return _iglob(pathname, root_dir, dir_fd, recursive, include_hidden) - -def _iglob(pathname, root_dir, dir_fd, recursive, include_hidden): - if os.path.altsep: - pathname = pathname.replace(os.path.altsep, os.path.sep) - drive, root, tail = os.path.splitroot(pathname) - parts = tail.split(os.path.sep)[::-1] if tail else [] - globber = _StringGlobber(recursive=recursive, include_hidden=include_hidden) - select = globber.selector(parts) - if drive: - root = drive + root - return select(root, dir_fd, root) - elif root: - return select(root, dir_fd, root, exists=True) - elif not root_dir: - return select(root, dir_fd, root, empty=True) + glob_in_dir = _glob0 + for dirname in dirs: + for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly, + include_hidden=include_hidden): + yield os.path.join(dirname, name) + +# These 2 helper functions non-recursively glob inside a literal directory. +# They return a list of basenames. _glob1 accepts a pattern while _glob0 +# takes a literal basename (so it only has to check for its existence). + +def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False): + names = _listdir(dirname, dir_fd, dironly) + if not (include_hidden or _ishidden(pattern)): + names = (x for x in names if not _ishidden(x)) + return fnmatch.filter(names, pattern) + +def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False): + if basename: + if _lexists(_join(dirname, basename), dir_fd): + return [basename] else: - root = os.path.join(root_dir, '') - root_len = len(root) - paths = select(root, dir_fd, root, empty=True) - return (path[root_len:] for path in paths) + # `os.path.split()` returns an empty basename for paths ending with a + # directory separator. 'q*x/' should match only directories. + if _isdir(dirname, dir_fd): + return [basename] + return [] _deprecated_function_message = ( "{name} is deprecated and will be removed in Python {remove}. Use " @@ -80,16 +130,102 @@ def _iglob(pathname, root_dir, dir_fd, recursive, include_hidden): def glob0(dirname, pattern): import warnings warnings._deprecated("glob.glob0", _deprecated_function_message, remove=(3, 15)) - dirname = os.path.join(dirname, '') - select = _StringGlobber().literal_selector(pattern, []) - return [path[len(dirname):] for path in select(dirname)] + return _glob0(dirname, pattern, None, False) def glob1(dirname, pattern): import warnings warnings._deprecated("glob.glob1", _deprecated_function_message, remove=(3, 15)) - dirname = os.path.join(dirname, '') - select = _StringGlobber().wildcard_selector(pattern, []) - return [path[len(dirname):] for path in select(dirname)] + return _glob1(dirname, pattern, None, False) + +# This helper function recursively yields relative pathnames inside a literal +# directory. + +def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False): + assert _isrecursive(pattern) + if not dirname or _isdir(dirname, dir_fd): + yield pattern[:0] + yield from _rlistdir(dirname, dir_fd, dironly, + include_hidden=include_hidden) + +# If dironly is false, yields all file names inside a directory. +# If dironly is true, yields only directory names. +def _iterdir(dirname, dir_fd, dironly): + try: + fd = None + fsencode = None + if dir_fd is not None: + if dirname: + fd = arg = os.open(dirname, _dir_open_flags, dir_fd=dir_fd) + else: + arg = dir_fd + if isinstance(dirname, bytes): + fsencode = os.fsencode + elif dirname: + arg = dirname + elif isinstance(dirname, bytes): + arg = bytes(os.curdir, 'ASCII') + else: + arg = os.curdir + try: + with os.scandir(arg) as it: + for entry in it: + try: + if not dironly or entry.is_dir(): + if fsencode is not None: + yield fsencode(entry.name) + else: + yield entry.name + except OSError: + pass + finally: + if fd is not None: + os.close(fd) + except OSError: + return + +def _listdir(dirname, dir_fd, dironly): + with contextlib.closing(_iterdir(dirname, dir_fd, dironly)) as it: + return list(it) + +# Recursively yields relative pathnames inside a literal directory. +def _rlistdir(dirname, dir_fd, dironly, include_hidden=False): + names = _listdir(dirname, dir_fd, dironly) + for x in names: + if include_hidden or not _ishidden(x): + yield x + path = _join(dirname, x) if dirname else x + for y in _rlistdir(path, dir_fd, dironly, + include_hidden=include_hidden): + yield _join(x, y) + + +def _lexists(pathname, dir_fd): + # Same as os.path.lexists(), but with dir_fd + if dir_fd is None: + return os.path.lexists(pathname) + try: + os.lstat(pathname, dir_fd=dir_fd) + except (OSError, ValueError): + return False + else: + return True + +def _isdir(pathname, dir_fd): + # Same as os.path.isdir(), but with dir_fd + if dir_fd is None: + return os.path.isdir(pathname) + try: + st = os.stat(pathname, dir_fd=dir_fd) + except (OSError, ValueError): + return False + else: + return stat.S_ISDIR(st.st_mode) + +def _join(dirname, basename): + # It is common if dirname or basename is empty + if not dirname or not basename: + return dirname or basename + return os.path.join(dirname, basename) magic_check = re.compile('([*?[])') magic_check_bytes = re.compile(b'([*?[])') @@ -101,6 +237,15 @@ def has_magic(s): match = magic_check.search(s) return match is not None +def _ishidden(path): + return path[0] in ('.', b'.'[0]) + +def _isrecursive(pattern): + if isinstance(pattern, bytes): + return pattern == b'**' + else: + return pattern == '**' + def escape(pathname): """Escape all special characters. """ @@ -174,13 +319,12 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None): return fr'(?s:{res})\Z' -@functools.lru_cache(maxsize=1024) -def _compile_pattern(pat, sep, case_sensitive, recursive, include_hidden): +@functools.lru_cache(maxsize=512) +def _compile_pattern(pat, sep, case_sensitive, recursive=True): """Compile given glob pattern to a re.Pattern object (observing case sensitivity).""" flags = re.NOFLAG if case_sensitive else re.IGNORECASE - regex = translate(pat, recursive=recursive, - include_hidden=include_hidden, seps=sep) + regex = translate(pat, recursive=recursive, include_hidden=True, seps=sep) return re.compile(regex, flags=flags).match @@ -188,13 +332,11 @@ class _GlobberBase: """Abstract class providing shell-style pattern matching and globbing. """ - def __init__(self, sep=os.path.sep, case_sensitive=os.name != 'nt', - case_pedantic=False, recursive=False, include_hidden=False): + def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False): self.sep = sep self.case_sensitive = case_sensitive self.case_pedantic = case_pedantic self.recursive = recursive - self.include_hidden = include_hidden # Abstract methods @@ -204,38 +346,12 @@ def lexists(path): """ raise NotImplementedError - @staticmethod - def lstat(path, dir_fd=None): - """Implements os.lstat() - """ - raise NotImplementedError - - @staticmethod - def open(path, flags, dir_fd=None): - """Implements os.open() - """ - raise NotImplementedError - @staticmethod def scandir(path): """Like os.scandir(), but generates (entry, name, path) tuples. """ raise NotImplementedError - @staticmethod - def scandir_cwd(): - raise NotImplementedError - - @staticmethod - def scandir_fd(fd, prefix): - raise NotImplementedError - - @staticmethod - def close(fd): - """Implements os.close(). - """ - raise NotImplementedError - @staticmethod def concat_path(path, text): """Implements path concatenation. @@ -245,8 +361,7 @@ def concat_path(path, text): # High-level methods def compile(self, pat): - return _compile_pattern(pat, self.sep, self.case_sensitive, - self.recursive, self.include_hidden) + return _compile_pattern(pat, self.sep, self.case_sensitive, self.recursive) def selector(self, parts): """Returns a function that selects from a given path, walking and @@ -271,14 +386,10 @@ def special_selector(self, part, parts): if parts: part += self.sep select_next = self.selector(parts) - if not part: - return select_next - def select_special(path, dir_fd=None, rel_path=None, exists=False, empty=False): + def select_special(path, exists=False): path = self.concat_path(path, part) - if dir_fd is not None: - rel_path = self.concat_path(rel_path, part) - return select_next(path, dir_fd, rel_path, exists) + return select_next(path, exists) return select_special def literal_selector(self, part, parts): @@ -295,11 +406,9 @@ def literal_selector(self, part, parts): select_next = self.selector(parts) - def select_literal(path, dir_fd=None, rel_path=None, exists=False, empty=False): + def select_literal(path, exists=False): path = self.concat_path(path, part) - if dir_fd is not None: - rel_path = self.concat_path(rel_path, part) - return select_next(path, dir_fd, rel_path) + return select_next(path, exists=False) return select_literal def wildcard_selector(self, part, parts): @@ -307,24 +416,14 @@ def wildcard_selector(self, part, parts): filtering by pattern. """ - match = None if self.include_hidden and part == '*' else self.compile(part) + match = None if part == '*' else self.compile(part) dir_only = bool(parts) if dir_only: select_next = self.selector(parts) - def select_wildcard(path, dir_fd=None, rel_path=None, exists=False, empty=False): - close_fd = False + def select_wildcard(path, exists=False): try: - if dir_fd is None: - fd = None - entries = self.scandir(path) if path else self.scandir_cwd() - elif not rel_path: - fd = dir_fd - entries = self.scandir_fd(fd, path) - else: - fd = self.open(rel_path, _dir_open_flags, dir_fd=dir_fd) - close_fd = True - entries = self.scandir_fd(fd, path) + entries = self.scandir(path) except OSError: pass else: @@ -337,17 +436,9 @@ def select_wildcard(path, dir_fd=None, rel_path=None, exists=False, empty=False) except OSError: continue entry_path = self.concat_path(entry_path, self.sep) - if fd is not None: - entry_name = entry_name + self.sep - yield from select_next( - entry_path, fd, entry_name, exists=True) + yield from select_next(entry_path, exists=True) else: - # Optimization: directly yield the path if this is - # last pattern part. yield entry_path - finally: - if close_fd: - self.close(fd) return select_wildcard def recursive_selector(self, part, parts): @@ -369,49 +460,26 @@ def recursive_selector(self, part, parts): while parts and parts[-1] not in _special_parts: part += self.sep + parts.pop() - match = None if self.include_hidden and part == '**' else self.compile(part) + match = None if part == '**' else self.compile(part) dir_only = bool(parts) select_next = self.selector(parts) - def select_recursive(path, dir_fd=None, rel_path=None, exists=False, empty=False): + def select_recursive(path, exists=False): match_pos = len(str(path)) if match is None or match(str(path), match_pos): - yield from select_next(path, dir_fd, rel_path, exists, empty) - stack = [(path, dir_fd, rel_path)] - try: - while stack: - yield from select_recursive_step(stack, match_pos) - finally: - # Close any file descriptors still on the stack. - while stack: - path, dir_fd, _rel_path = stack.pop() - if path is None: - try: - self.close(dir_fd) - except OSError: - pass + yield from select_next(path, exists) + stack = [path] + while stack: + yield from select_recursive_step(stack, match_pos) def select_recursive_step(stack, match_pos): - path, dir_fd, rel_path = stack.pop() + path = stack.pop() try: - if path is None: - self.close(dir_fd) - return - elif dir_fd is None: - fd = None - entries = self.scandir(path) if path else self.scandir_cwd() - elif not rel_path: - fd = dir_fd - entries = self.scandir_fd(fd, path) - else: - fd = self.open(rel_path, _dir_open_flags, dir_fd=dir_fd) - # Schedule the file descriptor to be closed next step. - stack.append((None, fd, None)) - entries = self.scandir_fd(fd, path) + entries = self.scandir(path) except OSError: pass else: - for entry, entry_name, entry_path in entries: + for entry, _entry_name, entry_path in entries: is_dir = False try: if entry.is_dir(follow_symlinks=follow_symlinks): @@ -423,38 +491,25 @@ def select_recursive_step(stack, match_pos): entry_path_str = str(entry_path) if dir_only: entry_path = self.concat_path(entry_path, self.sep) - if fd is not None: - entry_name = entry_name + self.sep if match is None or match(entry_path_str, match_pos): if dir_only: - yield from select_next( - entry_path, fd, entry_name, exists=True) + yield from select_next(entry_path, exists=True) else: # Optimization: directly yield the path if this is # last pattern part. yield entry_path if is_dir: - stack.append((entry_path, fd, entry_name)) + stack.append(entry_path) return select_recursive - def select_exists(self, path, dir_fd=None, rel_path=None, exists=False, empty=False): - """Yields the given path, if it exists. If *dir_fd* is given, we check - whether *rel_path* exists relative to the fd. + def select_exists(self, path, exists=False): + """Yields the given path, if it exists. """ - if empty: - # Suppress initial path so iglob() doesn't yield the empty string. - pass - elif exists: + if exists: # Optimization: this path is already known to exist, e.g. because # it was returned from os.scandir(), so we skip calling lstat(). yield path - elif dir_fd is not None: - try: - self.lstat(rel_path, dir_fd=dir_fd) - yield path - except OSError: - pass elif self.lexists(path): yield path @@ -463,9 +518,6 @@ class _StringGlobber(_GlobberBase): """Provides shell-style pattern matching and globbing for string paths. """ lexists = staticmethod(os.path.lexists) - lstat = staticmethod(os.lstat) - open = staticmethod(os.open) - close = staticmethod(os.close) concat_path = operator.add @staticmethod @@ -476,20 +528,6 @@ def scandir(path): entries = list(scandir_it) return ((entry, entry.name, entry.path) for entry in entries) - @staticmethod - def scandir_cwd(): - with os.scandir() as scandir_it: - entries = list(scandir_it) - # Suppress leading dot when scanning current directory. - return ((entry, entry.name, entry.name) for entry in entries) - - @staticmethod - def scandir_fd(fd, prefix): - prefix = os.path.join(prefix, prefix[:0]) - with os.scandir(fd) as scandir_it: - entries = list(scandir_it) - return ((entry, entry.name, prefix + entry.name) for entry in entries) - class _PathGlobber(_GlobberBase): """Provides shell-style pattern matching and globbing for pathlib paths. diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index bd2252ef77dcf6..930701d4789f5c 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -196,8 +196,7 @@ def full_match(self, pattern, *, case_sensitive=None): pattern = self.with_segments(pattern) if case_sensitive is None: case_sensitive = self.parser.normcase('Aa') == 'Aa' - globber = _PathGlobber(pattern.parser.sep, case_sensitive, - recursive=True, include_hidden=True) + globber = _PathGlobber(pattern.parser.sep, case_sensitive, recursive=True) match = globber.compile(str(pattern)) return match(str(self)) is not None diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 492bae0b7f7cd8..83da6960f00489 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -558,8 +558,7 @@ def full_match(self, pattern, *, case_sensitive=None): # paths shouldn't match wildcards, so we change it to the empty string. path = str(self) if self.parts else '' pattern = str(pattern) if pattern.parts else '' - globber = _StringGlobber(self.parser.sep, case_sensitive, - recursive=True, include_hidden=True) + globber = _StringGlobber(self.parser.sep, case_sensitive, recursive=True) return globber.compile(pattern)(path) is not None def match(self, path_pattern, *, case_sensitive=None): @@ -581,8 +580,7 @@ def match(self, path_pattern, *, case_sensitive=None): return False if len(path_parts) > len(pattern_parts) and path_pattern.anchor: return False - globber = _StringGlobber(self.parser.sep, case_sensitive, - include_hidden=True) + globber = _StringGlobber(self.parser.sep, case_sensitive) for path_part, pattern_part in zip(path_parts, pattern_parts): match = globber.compile(pattern_part) if match(path_part) is None: @@ -857,8 +855,7 @@ def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=False): case_pedantic = True parts = self._parse_pattern(pattern) recursive = True if recurse_symlinks else _no_recurse_symlinks - globber = _StringGlobber(self.parser.sep, case_sensitive, case_pedantic, - recursive, include_hidden=True) + globber = _StringGlobber(self.parser.sep, case_sensitive, case_pedantic, recursive) select = globber.selector(parts[::-1]) root = str(self) paths = select(self.parser.join(root, '')) diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index 2290e06e60e377..da73769c16e9af 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -4,18 +4,14 @@ import shutil import sys import unittest -import unittest.mock import warnings from test import support -from test.support import is_wasi, Py_DEBUG, infinite_recursion +from test.support import is_wasi, Py_DEBUG from test.support.os_helper import (TESTFN, skip_unless_symlink, can_symlink, create_empty_file, change_cwd) -_supports_dir_fd = {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd - - class GlobTests(unittest.TestCase): dir_fd = None @@ -53,7 +49,7 @@ def setUp(self): def open_dirfd(self): if self.dir_fd is not None: os.close(self.dir_fd) - if _supports_dir_fd: + if {os.open, os.stat} <= os.supports_dir_fd and os.scandir in os.supports_fd: self.dir_fd = os.open(self.tempdir, os.O_RDONLY | os.O_DIRECTORY) else: self.dir_fd = None @@ -181,18 +177,20 @@ def test_glob_directory_with_trailing_slash(self): self.assertEqual(glob.glob(self.norm('Z*Z') + sep), []) self.assertEqual(glob.glob(self.norm('ZZZ') + sep), []) self.assertEqual(glob.glob(self.norm('aaa') + sep), - [self.norm('aaa') + os.sep]) - # Redundant separators are preserved and normalized + [self.norm('aaa') + sep]) + # Preserving the redundant separators is an implementation detail. self.assertEqual(glob.glob(self.norm('aaa') + sep*2), - [self.norm('aaa') + os.sep*2]) + [self.norm('aaa') + sep*2]) # When there is a wildcard pattern which ends with a pathname # separator, glob() doesn't blow. # The result should end with the pathname separator. + # Normalizing the trailing separator is an implementation detail. eq = self.assertSequencesEqual_noorder eq(glob.glob(self.norm('aa*') + sep), [self.norm('aaa') + os.sep, self.norm('aab') + os.sep]) + # Stripping the redundant separators is an implementation detail. eq(glob.glob(self.norm('aa*') + sep*2), - [self.norm('aaa') + os.sep*2, self.norm('aab') + os.sep*2]) + [self.norm('aaa') + os.sep, self.norm('aab') + os.sep]) def test_glob_bytes_directory_with_trailing_slash(self): # Same as test_glob_directory_with_trailing_slash, but with a @@ -202,16 +200,16 @@ def test_glob_bytes_directory_with_trailing_slash(self): self.assertEqual(glob.glob(os.fsencode(self.norm('Z*Z') + sep)), []) self.assertEqual(glob.glob(os.fsencode(self.norm('ZZZ') + sep)), []) self.assertEqual(glob.glob(os.fsencode(self.norm('aaa') + sep)), - [os.fsencode(self.norm('aaa') + os.sep)]) + [os.fsencode(self.norm('aaa') + sep)]) self.assertEqual(glob.glob(os.fsencode(self.norm('aaa') + sep*2)), - [os.fsencode(self.norm('aaa') + os.sep*2)]) + [os.fsencode(self.norm('aaa') + sep*2)]) eq = self.assertSequencesEqual_noorder eq(glob.glob(os.fsencode(self.norm('aa*') + sep)), [os.fsencode(self.norm('aaa') + os.sep), os.fsencode(self.norm('aab') + os.sep)]) eq(glob.glob(os.fsencode(self.norm('aa*') + sep*2)), - [os.fsencode(self.norm('aaa') + os.sep*2), - os.fsencode(self.norm('aab') + os.sep*2)]) + [os.fsencode(self.norm('aaa') + os.sep), + os.fsencode(self.norm('aab') + os.sep)]) @skip_unless_symlink def test_glob_symlinks(self): @@ -328,12 +326,8 @@ def test_recursive_glob(self): with change_cwd(self.tempdir): join = os.path.join eq(glob.glob('**', recursive=True), [join(*i) for i in full]) - eq(glob.glob(join('**', '**'), recursive=True), - [join(*i) for i in full]) eq(glob.glob(join('**', ''), recursive=True), [join(*i) for i in dirs]) - eq(glob.glob(join('**', '**', ''), recursive=True), - [join(*i) for i in dirs]) eq(glob.glob(join('**', '*'), recursive=True), [join(*i) for i in full]) eq(glob.glob(join(os.curdir, '**'), recursive=True), @@ -400,33 +394,6 @@ def test_glob_many_open_files(self): for it in iters: self.assertEqual(next(it), p) - def test_glob_above_recursion_limit(self): - depth = 30 - base = os.path.join(self.tempdir, 'deep') - p = os.path.join(base, *(['d']*depth)) - os.makedirs(p) - pattern = os.path.join(base, '**', 'd') - with infinite_recursion(depth - 5): - glob.glob(pattern, recursive=True) - - @unittest.skipUnless(_supports_dir_fd, "Needs support for iglob(dir_fd=...)") - def test_iglob_iter_close(self): - base = os.path.join(self.tempdir, 'deep') - p = os.path.join(base, *(['d'] * 10)) - os.makedirs(p) - with ( - unittest.mock.patch("glob._StringGlobber.open", wraps=os.open) as os_open, - unittest.mock.patch("glob._StringGlobber.close", wraps=os.close) as os_close - ): - self.assertEqual(os_open.call_count, os_close.call_count) - iter = glob.iglob('**/*/d', dir_fd=self.dir_fd, recursive=True) - self.assertEqual(os_open.call_count, os_close.call_count) - self.assertEqual(next(iter), 'deep/d') - self.assertEqual(next(iter), 'deep/d/d') - self.assertGreater(os_open.call_count, os_close.call_count) - iter.close() - self.assertEqual(os_open.call_count, os_close.call_count) - def test_glob0(self): with self.assertWarns(DeprecationWarning): glob.glob0(self.tempdir, 'a') diff --git a/Misc/NEWS.d/next/Library/2024-03-05-23-08-11.gh-issue-116380.56HU7I.rst b/Misc/NEWS.d/next/Library/2024-03-05-23-08-11.gh-issue-116380.56HU7I.rst deleted file mode 100644 index b7f27ab7191a96..00000000000000 --- a/Misc/NEWS.d/next/Library/2024-03-05-23-08-11.gh-issue-116380.56HU7I.rst +++ /dev/null @@ -1,2 +0,0 @@ -Speed up :func:`glob.glob` and :func:`glob.iglob` by making use of -:func:`glob.translate` and tracking path existence more precisely.