From 1c8bf813007e63930f0f89a980f3b848b1590372 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 20 Jan 2024 18:44:39 +0000 Subject: [PATCH 1/9] GH-73435: Add `pathlib.PurePath.globmatch()` In 49f90ba we added support for the recursive wildcard `**` in `pathlib.PurePath.match()`. This should allow arbitrary prefix and suffix matching, like `p.match('foo/**')` or `p.match('**/foo')`, but there's a problem: for relative patterns only, `match()` implicitly inserts a `**` token on the left hand side, causing all patterns to match from the right. As a result, it's impossible to match relative patterns from the left: `PurePath('foo/bar').match('bar/**')` is true! This commit reverts the changes to `match()`, and instead adds a new `globmatch()` method that: - Supports the recursive wildcard `**` - Matches the *entire* path when given a relative pattern As a result, `globmatch()`'s pattern language exactly matches that of `glob()`. --- Doc/library/glob.rst | 2 +- Doc/library/pathlib.rst | 59 ++++++++------- Doc/whatsnew/3.13.rst | 3 +- Lib/pathlib/_abc.py | 31 ++++++-- Lib/test/test_pathlib/test_pathlib_abc.py | 91 +++++++++++++++++------ 5 files changed, 123 insertions(+), 63 deletions(-) diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 6e4f72c19ff4c9..1f68b369bb5787 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -147,7 +147,7 @@ The :mod:`glob` module defines the following functions: .. seealso:: - :meth:`pathlib.PurePath.match` and :meth:`pathlib.Path.glob` methods, + :meth:`pathlib.PurePath.globmatch` and :meth:`pathlib.Path.glob` methods, which call this function to implement pattern matching and globbing. .. versionadded:: 3.13 diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index b924f470e0be04..cf87bfbdffd5ba 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -552,55 +552,54 @@ Pure paths provide the following methods and properties: PureWindowsPath('c:/Program Files') -.. method:: PurePath.match(pattern, *, case_sensitive=None) +.. method:: PurePath.globmatch(pattern, *, case_sensitive=None) Match this path against the provided glob-style pattern. Return ``True`` - if matching is successful, ``False`` otherwise. - - If *pattern* is relative, the path can be either relative or absolute, - and matching is done from the right:: + if matching is successful, ``False`` otherwise. For example:: - >>> PurePath('a/b.py').match('*.py') - True - >>> PurePath('/a/b/c.py').match('b/*.py') + >>> PurePath('a/b.py').globmatch('a/*.py') True - >>> PurePath('/a/b/c.py').match('a/*.py') + >>> PurePath('a/b.py').globmatch('*.py') False + >>> PurePath('/a/b/c.py').globmatch('/a/**') + True + >>> PurePath('/a/b/c.py').globmatch('**/*.py') + True - If *pattern* is absolute, the path must be absolute, and the whole path - must match:: + As with other methods, case-sensitivity follows platform defaults:: - >>> PurePath('/a.py').match('/*.py') - True - >>> PurePath('a/b.py').match('/*.py') + >>> PurePosixPath('b.py').globmatch('*.PY') False + >>> PureWindowsPath('b.py').globmatch('*.PY') + True - The *pattern* may be another path object; this speeds up matching the same - pattern against multiple files:: + Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. - >>> pattern = PurePath('*.py') - >>> PurePath('a/b.py').match(pattern) - True + .. versionadded:: 3.13 - .. versionchanged:: 3.12 - Accepts an object implementing the :class:`os.PathLike` interface. - As with other methods, case-sensitivity follows platform defaults:: +.. method:: PurePath.match(pattern, *, case_sensitive=None) - >>> PurePosixPath('b.py').match('*.PY') - False - >>> PureWindowsPath('b.py').match('*.PY') + Match this path against the provided non-recursive glob-style pattern. + Return ``True`` if matching is successful, ``False`` otherwise. + + This method is similar to :meth:`~PurePath.globmatch`, but the recursive + wildcard "``**``" is not supported (it acts like non-recursive "``*``"), + and if a relative pattern is given, then matching is done from the right:: + + >>> PurePath('a/b.py').match('*.py') + True + >>> PurePath('/a/b/c.py').match('b/*.py') True + >>> PurePath('/a/b/c.py').match('a/*.py') + False - Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. + .. versionchanged:: 3.12 + The *pattern* parameter accepts a :term:`path-like object`. .. versionchanged:: 3.12 The *case_sensitive* parameter was added. - .. versionchanged:: 3.13 - Support for the recursive wildcard "``**``" was added. In previous - versions, it acted like the non-recursive wildcard "``*``". - .. method:: PurePath.relative_to(other, walk_up=False) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 40f0cd37fe9318..2f55d91830a05b 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -336,7 +336,8 @@ pathlib object from a 'file' URI (``file:/``). (Contributed by Barney Gale in :gh:`107465`.) -* Add support for recursive wildcards in :meth:`pathlib.PurePath.match`. +* Add :meth:`pathlib.PurePath.globmatch` for matching paths with + shell-style wildcards, including the recursive wildcard "``**``". (Contributed by Barney Gale in :gh:`73435`.) * Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob`, diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index e5eeb4afce2ea9..3f5be7da295a66 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -48,7 +48,7 @@ def _is_case_sensitive(pathmod): @functools.lru_cache(maxsize=256) -def _compile_pattern(pat, sep, case_sensitive): +def _compile_pattern(pat, sep, case_sensitive, recursive=True): """Compile given glob pattern to a re.Pattern object (observing case sensitivity).""" global re, glob @@ -56,7 +56,7 @@ def _compile_pattern(pat, sep, case_sensitive): import re, glob flags = re.NOFLAG if case_sensitive else re.IGNORECASE - regex = glob.translate(pat, recursive=True, include_hidden=True, seps=sep) + regex = glob.translate(pat, recursive=recursive, include_hidden=True, seps=sep) # The string representation of an empty path is a single dot ('.'). Empty # paths shouldn't match wildcards, so we consume it with an atomic group. regex = r'(\.\Z)?+' + regex @@ -450,13 +450,28 @@ def match(self, path_pattern, *, case_sensitive=None): if case_sensitive is None: case_sensitive = _is_case_sensitive(self.pathmod) sep = path_pattern.pathmod.sep - if path_pattern.anchor: - pattern_str = str(path_pattern) - elif path_pattern.parts: - pattern_str = str('**' / path_pattern) - else: + delta = len(self.parts) - len(path_pattern.parts) + if delta < 0: + return False # Path is too short. + if delta > 0 and path_pattern.anchor: + return False # Path is too long. + if not path_pattern.parts: raise ValueError("empty pattern") - match = _compile_pattern(pattern_str, sep, case_sensitive) + for path, pattern in zip(reversed(self.parts), reversed(path_pattern.parts)): + match = _compile_pattern(pattern, sep, case_sensitive, recursive=False) + if match(path) is None: + return False + return True + + def globmatch(self, pattern, *, case_sensitive=None): + """ + Return True if this path matches the given glob-style pattern. + """ + if not isinstance(pattern, PurePathBase): + pattern = self.with_segments(pattern) + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self.pathmod) + match = _compile_pattern(str(pattern), pattern.pathmod.sep, case_sensitive) return match(str(self)) is not None diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 199718a8a69c5a..d4987dfac122d6 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -249,29 +249,8 @@ def test_match_common(self): self.assertFalse(P('/ab.py').match('/a/*.py')) self.assertFalse(P('/a/b/c.py').match('/a/*.py')) # Multi-part glob-style pattern. - self.assertTrue(P('a').match('**')) - self.assertTrue(P('c.py').match('**')) - self.assertTrue(P('a/b/c.py').match('**')) - self.assertTrue(P('/a/b/c.py').match('**')) - self.assertTrue(P('/a/b/c.py').match('/**')) - self.assertTrue(P('/a/b/c.py').match('/a/**')) - self.assertTrue(P('/a/b/c.py').match('**/*.py')) - self.assertTrue(P('/a/b/c.py').match('/**/*.py')) + self.assertFalse(P('/a/b/c.py').match('/**/*.py')) self.assertTrue(P('/a/b/c.py').match('/a/**/*.py')) - self.assertTrue(P('/a/b/c.py').match('/a/b/**/*.py')) - self.assertTrue(P('/a/b/c.py').match('/**/**/**/**/*.py')) - self.assertFalse(P('c.py').match('**/a.py')) - self.assertFalse(P('c.py').match('c/**')) - self.assertFalse(P('a/b/c.py').match('**/a')) - self.assertFalse(P('a/b/c.py').match('**/a/b')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c.')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c./**')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c./**')) - self.assertFalse(P('a/b/c.py').match('/a/b/c.py/**')) - self.assertFalse(P('a/b/c.py').match('/**/a/b/c.py')) - self.assertRaises(ValueError, P('a').match, '**a/b/c') - self.assertRaises(ValueError, P('a').match, 'a/b/c**') # Case-sensitive flag self.assertFalse(P('A.py').match('a.PY', case_sensitive=True)) self.assertTrue(P('A.py').match('a.PY', case_sensitive=False)) @@ -279,9 +258,75 @@ def test_match_common(self): self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False)) # Matching against empty path self.assertFalse(P('').match('*')) - self.assertTrue(P('').match('**')) + self.assertFalse(P('').match('**')) self.assertFalse(P('').match('**/*')) + def test_globmatch_common(self): + P = self.cls + # Simple relative pattern. + self.assertTrue(P('b.py').globmatch('b.py')) + self.assertFalse(P('a/b.py').globmatch('b.py')) + self.assertFalse(P('/a/b.py').globmatch('b.py')) + self.assertFalse(P('a.py').globmatch('b.py')) + self.assertFalse(P('b/py').globmatch('b.py')) + self.assertFalse(P('/a.py').globmatch('b.py')) + self.assertFalse(P('b.py/c').globmatch('b.py')) + # Wildcard relative pattern. + self.assertTrue(P('b.py').globmatch('*.py')) + self.assertFalse(P('a/b.py').globmatch('*.py')) + self.assertFalse(P('/a/b.py').globmatch('*.py')) + self.assertFalse(P('b.pyc').globmatch('*.py')) + self.assertFalse(P('b./py').globmatch('*.py')) + self.assertFalse(P('b.py/c').globmatch('*.py')) + # Multi-part relative pattern. + self.assertTrue(P('ab/c.py').globmatch('a*/*.py')) + self.assertFalse(P('/d/ab/c.py').globmatch('a*/*.py')) + self.assertFalse(P('a.py').globmatch('a*/*.py')) + self.assertFalse(P('/dab/c.py').globmatch('a*/*.py')) + self.assertFalse(P('ab/c.py/d').globmatch('a*/*.py')) + # Absolute pattern. + self.assertTrue(P('/b.py').globmatch('/*.py')) + self.assertFalse(P('b.py').globmatch('/*.py')) + self.assertFalse(P('a/b.py').globmatch('/*.py')) + self.assertFalse(P('/a/b.py').globmatch('/*.py')) + # Multi-part absolute pattern. + self.assertTrue(P('/a/b.py').globmatch('/a/*.py')) + self.assertFalse(P('/ab.py').globmatch('/a/*.py')) + self.assertFalse(P('/a/b/c.py').globmatch('/a/*.py')) + # Multi-part glob-style pattern. + self.assertTrue(P('a').globmatch('**')) + self.assertTrue(P('c.py').globmatch('**')) + self.assertTrue(P('a/b/c.py').globmatch('**')) + self.assertTrue(P('/a/b/c.py').globmatch('**')) + self.assertTrue(P('/a/b/c.py').globmatch('/**')) + self.assertTrue(P('/a/b/c.py').globmatch('/a/**')) + self.assertTrue(P('/a/b/c.py').globmatch('**/*.py')) + self.assertTrue(P('/a/b/c.py').globmatch('/**/*.py')) + self.assertTrue(P('/a/b/c.py').globmatch('/a/**/*.py')) + self.assertTrue(P('/a/b/c.py').globmatch('/a/b/**/*.py')) + self.assertTrue(P('/a/b/c.py').globmatch('/**/**/**/**/*.py')) + self.assertFalse(P('c.py').globmatch('**/a.py')) + self.assertFalse(P('c.py').globmatch('c/**')) + self.assertFalse(P('a/b/c.py').globmatch('**/a')) + self.assertFalse(P('a/b/c.py').globmatch('**/a/b')) + self.assertFalse(P('a/b/c.py').globmatch('**/a/b/c')) + self.assertFalse(P('a/b/c.py').globmatch('**/a/b/c.')) + self.assertFalse(P('a/b/c.py').globmatch('**/a/b/c./**')) + self.assertFalse(P('a/b/c.py').globmatch('**/a/b/c./**')) + self.assertFalse(P('a/b/c.py').globmatch('/a/b/c.py/**')) + self.assertFalse(P('a/b/c.py').globmatch('/**/a/b/c.py')) + self.assertRaises(ValueError, P('a').globmatch, '**a/b/c') + self.assertRaises(ValueError, P('a').globmatch, 'a/b/c**') + # Case-sensitive flag + self.assertFalse(P('A.py').globmatch('a.PY', case_sensitive=True)) + self.assertTrue(P('A.py').globmatch('a.PY', case_sensitive=False)) + self.assertFalse(P('c:/a/B.Py').globmatch('C:/A/*.pY', case_sensitive=True)) + self.assertTrue(P('/a/b/c.py').globmatch('/A/*/*.Py', case_sensitive=False)) + # Matching against empty path + self.assertFalse(P('').globmatch('*')) + self.assertTrue(P('').globmatch('**')) + self.assertFalse(P('').globmatch('**/*')) + def test_parts_common(self): # `parts` returns a tuple. sep = self.sep From 7f9222a852307c4eddeb6d474e6110f36f2fc15b Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 20 Jan 2024 19:06:20 +0000 Subject: [PATCH 2/9] Simplify `match()` code slightly. --- Lib/pathlib/_abc.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 3f5be7da295a66..efd9e0e35eee23 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -450,16 +450,17 @@ def match(self, path_pattern, *, case_sensitive=None): if case_sensitive is None: case_sensitive = _is_case_sensitive(self.pathmod) sep = path_pattern.pathmod.sep - delta = len(self.parts) - len(path_pattern.parts) - if delta < 0: - return False # Path is too short. - if delta > 0 and path_pattern.anchor: - return False # Path is too long. - if not path_pattern.parts: + our_parts = self.parts[::-1] + pat_parts = path_pattern.parts[::-1] + if not pat_parts: raise ValueError("empty pattern") - for path, pattern in zip(reversed(self.parts), reversed(path_pattern.parts)): - match = _compile_pattern(pattern, sep, case_sensitive, recursive=False) - if match(path) is None: + if len(our_parts) < len(pat_parts): + return False + if len(our_parts) > len(pat_parts) and path_pattern.anchor: + return False + for our_part, pat_part in zip(our_parts, pat_parts): + match = _compile_pattern(pat_part, sep, case_sensitive, recursive=False) + if match(our_part) is None: return False return True From ed203e80d8c628e402628155e2362057fcb69062 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 20 Jan 2024 22:24:32 +0000 Subject: [PATCH 3/9] Improve variable names --- Lib/pathlib/_abc.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index efd9e0e35eee23..32ea82dacca42d 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -450,17 +450,17 @@ def match(self, path_pattern, *, case_sensitive=None): if case_sensitive is None: case_sensitive = _is_case_sensitive(self.pathmod) sep = path_pattern.pathmod.sep - our_parts = self.parts[::-1] - pat_parts = path_pattern.parts[::-1] - if not pat_parts: + path_parts = self.parts[::-1] + pattern_parts = path_pattern.parts[::-1] + if not pattern_parts: raise ValueError("empty pattern") - if len(our_parts) < len(pat_parts): + if len(path_parts) < len(pattern_parts): return False - if len(our_parts) > len(pat_parts) and path_pattern.anchor: + if len(path_parts) > len(pattern_parts) and path_pattern.anchor: return False - for our_part, pat_part in zip(our_parts, pat_parts): - match = _compile_pattern(pat_part, sep, case_sensitive, recursive=False) - if match(our_part) is None: + for path_part, pattern_part in zip(path_parts, pattern_parts): + match = _compile_pattern(pattern_part, sep, case_sensitive, recursive=False) + if match(path_part) is None: return False return True From aaa1ddb3b87bb97f4c52c8a2d638f860c66712f5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 22 Jan 2024 22:49:51 +0000 Subject: [PATCH 4/9] `globmatch` --> `full_match` --- Doc/library/glob.rst | 5 +- Doc/library/pathlib.rst | 16 ++-- Doc/whatsnew/3.13.rst | 2 +- Lib/pathlib/_abc.py | 2 +- Lib/test/test_pathlib/test_pathlib_abc.py | 112 +++++++++++----------- 5 files changed, 69 insertions(+), 68 deletions(-) diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 1f68b369bb5787..19a0bbba8966ba 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -147,8 +147,9 @@ The :mod:`glob` module defines the following functions: .. seealso:: - :meth:`pathlib.PurePath.globmatch` and :meth:`pathlib.Path.glob` methods, - which call this function to implement pattern matching and globbing. + :meth:`pathlib.PurePath.full_match` and :meth:`pathlib.Path.glob` + methods, which call this function to implement pattern matching and + globbing. .. versionadded:: 3.13 diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index cf87bfbdffd5ba..1e8e6a93415477 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -552,25 +552,25 @@ Pure paths provide the following methods and properties: PureWindowsPath('c:/Program Files') -.. method:: PurePath.globmatch(pattern, *, case_sensitive=None) +.. method:: PurePath.full_match(pattern, *, case_sensitive=None) Match this path against the provided glob-style pattern. Return ``True`` if matching is successful, ``False`` otherwise. For example:: - >>> PurePath('a/b.py').globmatch('a/*.py') + >>> PurePath('a/b.py').full_match('a/*.py') True - >>> PurePath('a/b.py').globmatch('*.py') + >>> PurePath('a/b.py').full_match('*.py') False - >>> PurePath('/a/b/c.py').globmatch('/a/**') + >>> PurePath('/a/b/c.py').full_match('/a/**') True - >>> PurePath('/a/b/c.py').globmatch('**/*.py') + >>> PurePath('/a/b/c.py').full_match('**/*.py') True As with other methods, case-sensitivity follows platform defaults:: - >>> PurePosixPath('b.py').globmatch('*.PY') + >>> PurePosixPath('b.py').full_match('*.PY') False - >>> PureWindowsPath('b.py').globmatch('*.PY') + >>> PureWindowsPath('b.py').full_match('*.PY') True Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. @@ -583,7 +583,7 @@ Pure paths provide the following methods and properties: Match this path against the provided non-recursive glob-style pattern. Return ``True`` if matching is successful, ``False`` otherwise. - This method is similar to :meth:`~PurePath.globmatch`, but the recursive + This method is similar to :meth:`~PurePath.full_match`, but the recursive wildcard "``**``" is not supported (it acts like non-recursive "``*``"), and if a relative pattern is given, then matching is done from the right:: diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 2f55d91830a05b..8c2bb05920d5b6 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -336,7 +336,7 @@ pathlib object from a 'file' URI (``file:/``). (Contributed by Barney Gale in :gh:`107465`.) -* Add :meth:`pathlib.PurePath.globmatch` for matching paths with +* Add :meth:`pathlib.PurePath.full_match` for matching paths with shell-style wildcards, including the recursive wildcard "``**``". (Contributed by Barney Gale in :gh:`73435`.) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 32ea82dacca42d..8fb5dad34680ac 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -464,7 +464,7 @@ def match(self, path_pattern, *, case_sensitive=None): return False return True - def globmatch(self, pattern, *, case_sensitive=None): + def full_match(self, pattern, *, case_sensitive=None): """ Return True if this path matches the given glob-style pattern. """ diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index d4987dfac122d6..dd46249b7ec826 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -261,71 +261,71 @@ def test_match_common(self): self.assertFalse(P('').match('**')) self.assertFalse(P('').match('**/*')) - def test_globmatch_common(self): + def test_full_match_common(self): P = self.cls # Simple relative pattern. - self.assertTrue(P('b.py').globmatch('b.py')) - self.assertFalse(P('a/b.py').globmatch('b.py')) - self.assertFalse(P('/a/b.py').globmatch('b.py')) - self.assertFalse(P('a.py').globmatch('b.py')) - self.assertFalse(P('b/py').globmatch('b.py')) - self.assertFalse(P('/a.py').globmatch('b.py')) - self.assertFalse(P('b.py/c').globmatch('b.py')) + self.assertTrue(P('b.py').full_match('b.py')) + self.assertFalse(P('a/b.py').full_match('b.py')) + self.assertFalse(P('/a/b.py').full_match('b.py')) + self.assertFalse(P('a.py').full_match('b.py')) + self.assertFalse(P('b/py').full_match('b.py')) + self.assertFalse(P('/a.py').full_match('b.py')) + self.assertFalse(P('b.py/c').full_match('b.py')) # Wildcard relative pattern. - self.assertTrue(P('b.py').globmatch('*.py')) - self.assertFalse(P('a/b.py').globmatch('*.py')) - self.assertFalse(P('/a/b.py').globmatch('*.py')) - self.assertFalse(P('b.pyc').globmatch('*.py')) - self.assertFalse(P('b./py').globmatch('*.py')) - self.assertFalse(P('b.py/c').globmatch('*.py')) + self.assertTrue(P('b.py').full_match('*.py')) + self.assertFalse(P('a/b.py').full_match('*.py')) + self.assertFalse(P('/a/b.py').full_match('*.py')) + self.assertFalse(P('b.pyc').full_match('*.py')) + self.assertFalse(P('b./py').full_match('*.py')) + self.assertFalse(P('b.py/c').full_match('*.py')) # Multi-part relative pattern. - self.assertTrue(P('ab/c.py').globmatch('a*/*.py')) - self.assertFalse(P('/d/ab/c.py').globmatch('a*/*.py')) - self.assertFalse(P('a.py').globmatch('a*/*.py')) - self.assertFalse(P('/dab/c.py').globmatch('a*/*.py')) - self.assertFalse(P('ab/c.py/d').globmatch('a*/*.py')) + self.assertTrue(P('ab/c.py').full_match('a*/*.py')) + self.assertFalse(P('/d/ab/c.py').full_match('a*/*.py')) + self.assertFalse(P('a.py').full_match('a*/*.py')) + self.assertFalse(P('/dab/c.py').full_match('a*/*.py')) + self.assertFalse(P('ab/c.py/d').full_match('a*/*.py')) # Absolute pattern. - self.assertTrue(P('/b.py').globmatch('/*.py')) - self.assertFalse(P('b.py').globmatch('/*.py')) - self.assertFalse(P('a/b.py').globmatch('/*.py')) - self.assertFalse(P('/a/b.py').globmatch('/*.py')) + self.assertTrue(P('/b.py').full_match('/*.py')) + self.assertFalse(P('b.py').full_match('/*.py')) + self.assertFalse(P('a/b.py').full_match('/*.py')) + self.assertFalse(P('/a/b.py').full_match('/*.py')) # Multi-part absolute pattern. - self.assertTrue(P('/a/b.py').globmatch('/a/*.py')) - self.assertFalse(P('/ab.py').globmatch('/a/*.py')) - self.assertFalse(P('/a/b/c.py').globmatch('/a/*.py')) + self.assertTrue(P('/a/b.py').full_match('/a/*.py')) + self.assertFalse(P('/ab.py').full_match('/a/*.py')) + self.assertFalse(P('/a/b/c.py').full_match('/a/*.py')) # Multi-part glob-style pattern. - self.assertTrue(P('a').globmatch('**')) - self.assertTrue(P('c.py').globmatch('**')) - self.assertTrue(P('a/b/c.py').globmatch('**')) - self.assertTrue(P('/a/b/c.py').globmatch('**')) - self.assertTrue(P('/a/b/c.py').globmatch('/**')) - self.assertTrue(P('/a/b/c.py').globmatch('/a/**')) - self.assertTrue(P('/a/b/c.py').globmatch('**/*.py')) - self.assertTrue(P('/a/b/c.py').globmatch('/**/*.py')) - self.assertTrue(P('/a/b/c.py').globmatch('/a/**/*.py')) - self.assertTrue(P('/a/b/c.py').globmatch('/a/b/**/*.py')) - self.assertTrue(P('/a/b/c.py').globmatch('/**/**/**/**/*.py')) - self.assertFalse(P('c.py').globmatch('**/a.py')) - self.assertFalse(P('c.py').globmatch('c/**')) - self.assertFalse(P('a/b/c.py').globmatch('**/a')) - self.assertFalse(P('a/b/c.py').globmatch('**/a/b')) - self.assertFalse(P('a/b/c.py').globmatch('**/a/b/c')) - self.assertFalse(P('a/b/c.py').globmatch('**/a/b/c.')) - self.assertFalse(P('a/b/c.py').globmatch('**/a/b/c./**')) - self.assertFalse(P('a/b/c.py').globmatch('**/a/b/c./**')) - self.assertFalse(P('a/b/c.py').globmatch('/a/b/c.py/**')) - self.assertFalse(P('a/b/c.py').globmatch('/**/a/b/c.py')) - self.assertRaises(ValueError, P('a').globmatch, '**a/b/c') - self.assertRaises(ValueError, P('a').globmatch, 'a/b/c**') + self.assertTrue(P('a').full_match('**')) + self.assertTrue(P('c.py').full_match('**')) + self.assertTrue(P('a/b/c.py').full_match('**')) + self.assertTrue(P('/a/b/c.py').full_match('**')) + self.assertTrue(P('/a/b/c.py').full_match('/**')) + self.assertTrue(P('/a/b/c.py').full_match('/a/**')) + self.assertTrue(P('/a/b/c.py').full_match('**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/a/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/a/b/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/**/**/**/**/*.py')) + self.assertFalse(P('c.py').full_match('**/a.py')) + self.assertFalse(P('c.py').full_match('c/**')) + self.assertFalse(P('a/b/c.py').full_match('**/a')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c.')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**')) + self.assertFalse(P('a/b/c.py').full_match('/a/b/c.py/**')) + self.assertFalse(P('a/b/c.py').full_match('/**/a/b/c.py')) + self.assertRaises(ValueError, P('a').full_match, '**a/b/c') + self.assertRaises(ValueError, P('a').full_match, 'a/b/c**') # Case-sensitive flag - self.assertFalse(P('A.py').globmatch('a.PY', case_sensitive=True)) - self.assertTrue(P('A.py').globmatch('a.PY', case_sensitive=False)) - self.assertFalse(P('c:/a/B.Py').globmatch('C:/A/*.pY', case_sensitive=True)) - self.assertTrue(P('/a/b/c.py').globmatch('/A/*/*.Py', case_sensitive=False)) + self.assertFalse(P('A.py').full_match('a.PY', case_sensitive=True)) + self.assertTrue(P('A.py').full_match('a.PY', case_sensitive=False)) + self.assertFalse(P('c:/a/B.Py').full_match('C:/A/*.pY', case_sensitive=True)) + self.assertTrue(P('/a/b/c.py').full_match('/A/*/*.Py', case_sensitive=False)) # Matching against empty path - self.assertFalse(P('').globmatch('*')) - self.assertTrue(P('').globmatch('**')) - self.assertFalse(P('').globmatch('**/*')) + self.assertFalse(P('').full_match('*')) + self.assertTrue(P('').full_match('**')) + self.assertFalse(P('').full_match('**/*')) def test_parts_common(self): # `parts` returns a tuple. From 5c2ea0e6fdbb704348dfaf94d604afff9527c161 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 22 Jan 2024 22:55:38 +0000 Subject: [PATCH 5/9] Improve docstrings. --- Lib/pathlib/_abc.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 8fb5dad34680ac..b6de95cffd47f3 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -443,7 +443,10 @@ def _pattern_stack(self): def match(self, path_pattern, *, case_sensitive=None): """ - Return True if this path matches the given pattern. + Return True if this path matches the given pattern. If the pattern is + relative, matching is done from the right; otherwise, the entire path + is matched. The recursive wildcard '**' is *not* supported by this + method. """ if not isinstance(path_pattern, PurePathBase): path_pattern = self.with_segments(path_pattern) @@ -466,7 +469,8 @@ def match(self, path_pattern, *, case_sensitive=None): def full_match(self, pattern, *, case_sensitive=None): """ - Return True if this path matches the given glob-style pattern. + Return True if this path matches the given glob-style pattern. The + pattern is matched against the entire path. """ if not isinstance(pattern, PurePathBase): pattern = self.with_segments(pattern) From a5eaf810d7f9ecde5a04b54fc81a89fabfeca19a Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 23 Jan 2024 21:44:09 +0000 Subject: [PATCH 6/9] Note difference in empty pattern handling. --- Doc/library/pathlib.rst | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 1e8e6a93415477..27b128f3bb21d1 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -583,9 +583,10 @@ Pure paths provide the following methods and properties: Match this path against the provided non-recursive glob-style pattern. Return ``True`` if matching is successful, ``False`` otherwise. - This method is similar to :meth:`~PurePath.full_match`, but the recursive - wildcard "``**``" is not supported (it acts like non-recursive "``*``"), - and if a relative pattern is given, then matching is done from the right:: + This method is similar to :meth:`~PurePath.full_match`, but empty patterns + aren't allowed (:exc:`ValueError` is raised), the recursive wildcard + "``**``" isn't supported (it acts like non-recursive "``*``"), and if a + relative pattern is provided, then matching is done from the right:: >>> PurePath('a/b.py').match('*.py') True From 6fd7b016f14df4a86ab9766eb73ab382fd23581f Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 26 Jan 2024 00:04:28 +0000 Subject: [PATCH 7/9] Double compiled pattern cache size --- Lib/pathlib/_abc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index b6de95cffd47f3..dc3a8e0b911d20 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -47,7 +47,7 @@ def _is_case_sensitive(pathmod): re = glob = None -@functools.lru_cache(maxsize=256) +@functools.lru_cache(maxsize=512) def _compile_pattern(pat, sep, case_sensitive, recursive=True): """Compile given glob pattern to a re.Pattern object (observing case sensitivity).""" From 2172b4de58031d20953622908cd9df2a964dc556 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 26 Jan 2024 00:24:00 +0000 Subject: [PATCH 8/9] Fix handling of empty paths and patterns. --- Lib/pathlib/__init__.py | 7 +++++++ Lib/pathlib/_abc.py | 16 +++++++++------- Lib/test/test_pathlib/test_pathlib_abc.py | 3 +++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index b043aed12b3849..eee82ef26bc7e7 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -490,6 +490,13 @@ def _pattern_stack(self): parts.reverse() return parts + @property + def _pattern_str(self): + """The path expressed as a string, for use in pattern-matching.""" + # The string representation of an empty path is a single dot ('.'). Empty + # paths shouldn't match wildcards, so we change it to the empty string. + path_str = str(self) + return '' if path_str == '.' else path_str # Subclassing os.PathLike makes isinstance() checks slower, # which in turn makes Path construction slower. Register instead! diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index dc3a8e0b911d20..74f7491c3d3cff 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -57,9 +57,6 @@ def _compile_pattern(pat, sep, case_sensitive, recursive=True): flags = re.NOFLAG if case_sensitive else re.IGNORECASE regex = glob.translate(pat, recursive=recursive, include_hidden=True, seps=sep) - # The string representation of an empty path is a single dot ('.'). Empty - # paths shouldn't match wildcards, so we consume it with an atomic group. - regex = r'(\.\Z)?+' + regex return re.compile(regex, flags=flags).match @@ -441,6 +438,11 @@ def _pattern_stack(self): raise NotImplementedError("Non-relative patterns are unsupported") return parts + @property + def _pattern_str(self): + """The path expressed as a string, for use in pattern-matching.""" + return str(self) + def match(self, path_pattern, *, case_sensitive=None): """ Return True if this path matches the given pattern. If the pattern is @@ -476,8 +478,8 @@ def full_match(self, pattern, *, case_sensitive=None): pattern = self.with_segments(pattern) if case_sensitive is None: case_sensitive = _is_case_sensitive(self.pathmod) - match = _compile_pattern(str(pattern), pattern.pathmod.sep, case_sensitive) - return match(str(self)) is not None + match = _compile_pattern(pattern._pattern_str, pattern.pathmod.sep, case_sensitive) + return match(self._pattern_str) is not None @@ -801,8 +803,8 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): if filter_paths: # Filter out paths that don't match pattern. prefix_len = len(str(self._make_child_relpath('_'))) - 1 - match = _compile_pattern(str(pattern), sep, case_sensitive) - paths = (path for path in paths if match(str(path), prefix_len)) + match = _compile_pattern(pattern._pattern_str, sep, case_sensitive) + paths = (path for path in paths if match(path._pattern_str, prefix_len)) return paths def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index dd46249b7ec826..3f705722f3e82a 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -326,6 +326,9 @@ def test_full_match_common(self): self.assertFalse(P('').full_match('*')) self.assertTrue(P('').full_match('**')) self.assertFalse(P('').full_match('**/*')) + # Matching with empty pattern + self.assertTrue(P('').full_match('')) + self.assertTrue(P('.').full_match('.')) def test_parts_common(self): # `parts` returns a tuple. From f60f1474b9a50eeb64c90ec9a2c37feb9504fba5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 26 Jan 2024 00:48:07 +0000 Subject: [PATCH 9/9] Couple more test cases. --- Lib/test/test_pathlib/test_pathlib_abc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 3f705722f3e82a..364f776dbb1413 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -329,6 +329,10 @@ def test_full_match_common(self): # Matching with empty pattern self.assertTrue(P('').full_match('')) self.assertTrue(P('.').full_match('.')) + self.assertFalse(P('/').full_match('')) + self.assertFalse(P('/').full_match('.')) + self.assertFalse(P('foo').full_match('')) + self.assertFalse(P('foo').full_match('.')) def test_parts_common(self): # `parts` returns a tuple.