From a1326c8c5f6dfb2d402f9346583accfa12552989 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 24 Nov 2023 07:35:17 +0000 Subject: [PATCH 01/23] GH-65238: Preserve trailing slash in pathlib --- Lib/pathlib.py | 93 ++++++++++++++++++++++++++-------------- Lib/test/test_pathlib.py | 90 +++++++++++++++++++------------------- 2 files changed, 108 insertions(+), 75 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 9bce5320ef68e9..434715b47d4c82 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -88,6 +88,12 @@ def _compile_pattern(pat, sep, case_sensitive): return re.compile(regex, flags).match +def _select_parents(paths, dir_only): + """Yield lexical '..' children of the given paths.""" + for path in paths: + yield path._make_child_relpath('..', dir_only) + + def _select_children(parent_paths, dir_only, follow_symlinks, match): """Yield direct children of given paths, filtering by name and type.""" if follow_symlinks is None: @@ -110,7 +116,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match): continue name = entry.name if match(name): - yield parent_path._make_child_relpath(name) + yield parent_path._make_child_relpath(name, dir_only) def _select_recursive(parent_paths, dir_only, follow_symlinks): @@ -133,7 +139,7 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks): for entry in entries: try: if entry.is_dir(follow_symlinks=follow_symlinks): - paths.append(path._make_child_relpath(entry.name)) + paths.append(path._make_child_relpath(entry.name, dir_only)) continue except OSError: pass @@ -271,6 +277,9 @@ def _parse_path(cls, path): # e.g. //?/unc/server/share root = sep parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.'] + if parsed and not rel.endswith(parsed[-1]): + # Preserve trailing slash + parsed.append('') return drv, root, parsed def _load_parts(self): @@ -359,7 +368,7 @@ def name(self): tail = self._tail if not tail: return '' - return tail[-1] + return tail[-1] or tail[-2] @property def suffix(self): @@ -398,15 +407,38 @@ def stem(self): else: return name + @property + def has_trailing_sep(self): + tail = self._tail + return tail and not tail[-1] + + def without_trailing_sep(self): + tail = self._tail + if tail and not tail[-1]: + return self._from_parsed_parts(self.drive, self.root, tail[:-1]) + else: + return self + + def with_trailing_sep(self): + tail = self._tail + if not tail: + raise ValueError('empty name') + elif tail[-1]: + return self._from_parsed_parts(self.drive, self.root, tail + ['']) + else: + return self + def with_name(self, name): """Return a new path with the file name changed.""" - if not self.name: - raise ValueError("%r has an empty name" % (self,)) m = self.pathmod if not name or m.sep in name or (m.altsep and m.altsep in name) or name == '.': raise ValueError("Invalid name %r" % (name)) - return self._from_parsed_parts(self.drive, self.root, - self._tail[:-1] + [name]) + tail = list(self._tail) + if not tail: + raise ValueError("%r has an empty name" % (self,)) + idx = -1 if tail[-1] else -2 + tail[idx] = name + return self._from_parsed_parts(self.drive, self.root, tail) def with_stem(self, stem): """Return a new path with the stem changed.""" @@ -417,21 +449,9 @@ def with_suffix(self, suffix): has no suffix, add given suffix. If the given suffix is an empty string, remove the suffix from the path. """ - m = self.pathmod - if m.sep in suffix or m.altsep and m.altsep in suffix: - raise ValueError("Invalid suffix %r" % (suffix,)) if suffix and not suffix.startswith('.') or suffix == '.': raise ValueError("Invalid suffix %r" % (suffix)) - name = self.name - if not name: - raise ValueError("%r has an empty name" % (self,)) - old_suffix = self.suffix - if not old_suffix: - name = name + suffix - else: - name = name[:-len(old_suffix)] + suffix - return self._from_parsed_parts(self.drive, self.root, - self._tail[:-1] + [name]) + return self.with_name(self.stem + suffix) def relative_to(self, other, /, *_deprecated, walk_up=False): """Return the relative path to another path identified by the passed @@ -450,6 +470,7 @@ def relative_to(self, other, /, *_deprecated, walk_up=False): other = self.with_segments(other, *_deprecated) elif not isinstance(other, PurePath): other = self.with_segments(other) + other = other.without_trailing_sep() for step, path in enumerate([other] + list(other.parents)): if path == self or path in self.parents: break @@ -474,6 +495,7 @@ def is_relative_to(self, other, /, *_deprecated): other = self.with_segments(other, *_deprecated) elif not isinstance(other, PurePath): other = self.with_segments(other) + other = other.without_trailing_sep() return other == self or other in self.parents @property @@ -513,7 +535,8 @@ def parent(self): tail = self._tail if not tail: return self - path = self._from_parsed_parts(drv, root, tail[:-1]) + idx = -1 if tail[-1] else -2 + path = self._from_parsed_parts(drv, root, tail[:idx]) path._resolving = self._resolving return path @@ -522,7 +545,7 @@ def parents(self): """A sequence of this path's logical parents.""" # The value of this property should not be cached on the path object, # as doing so would introduce a reference cycle. - return _PathParents(self) + return _PathParents(self.without_trailing_sep()) def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, @@ -991,20 +1014,28 @@ def _scandir(self): # context manager. This method is called by walk() and glob(). return contextlib.nullcontext(self.iterdir()) - def _make_child_relpath(self, name): + def _make_child_relpath(self, name, trailing_slash=False): path_str = str(self) - tail = self._tail + tail = list(self._tail) if tail: - path_str = f'{path_str}{self.pathmod.sep}{name}' + if tail[-1]: + path_str = f'{path_str}{self.pathmod.sep}{name}' + else: + path_str = f'{path_str}{name}' + tail.pop(-1) elif path_str != '.': path_str = f'{path_str}{name}' else: path_str = name + tail.append(name) + if trailing_slash: + path_str = f'{path_str}{self.pathmod.sep}' + tail.append('') path = self.with_segments(path_str) path._str = path_str path._drv = self.drive path._root = self.root - path._tail_cached = tail + [name] + path._tail_cached = tail return path def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): @@ -1030,9 +1061,6 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): raise ValueError("Unacceptable pattern: {!r}".format(pattern)) pattern_parts = list(path_pattern._tail) - if pattern[-1] in (self.pathmod.sep, self.pathmod.altsep): - # GH-65238: pathlib doesn't preserve trailing slash. Add it back. - pattern_parts.append('') if pattern_parts[-1] == '**': # GH-70303: '**' only matches directories. Add trailing slash. warnings.warn( @@ -1056,7 +1084,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): filter_paths = follow_symlinks is not None and '..' not in pattern_parts deduplicate_paths = False sep = self.pathmod.sep - paths = iter([self] if self.is_dir() else []) + paths = iter([self.with_trailing_sep()] if self.is_dir() else []) part_idx = 0 while part_idx < len(pattern_parts): part = pattern_parts[part_idx] @@ -1065,7 +1093,8 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): # Trailing slash. pass elif part == '..': - paths = (path._make_child_relpath('..') for path in paths) + dir_only = part_idx < len(pattern_parts) + paths = _select_parents(paths, dir_only) elif part == '**': # Consume adjacent '**' components. while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**': @@ -1214,6 +1243,8 @@ def resolve(self, strict=False): # Delete '..' segment and its predecessor path = path.parent continue + elif not part: + continue next_path = path._make_child_relpath(part) if querying and part != '..': next_path._resolving = True diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index e1121a9d76c040..20792b7e40823f 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -53,11 +53,14 @@ class PurePathTest(unittest.TestCase): # supposed to produce equal paths. equivalences = { 'a/b': [ - ('a', 'b'), ('a/', 'b'), ('a', 'b/'), ('a/', 'b/'), - ('a/b/',), ('a//b',), ('a//b//',), + ('a', 'b'), ('a/', 'b'), ('a//b',), # Empty components get removed. - ('', 'a', 'b'), ('a', '', 'b'), ('a', 'b', ''), + ('', 'a', 'b'), ('a', '', 'b'), ], + 'a/b/': [ + ('a', 'b/'), ('a/', 'b/'), + ('a/b/',), ('a//b//',), ('a', 'b', ''), + ], '/b/c/d': [ ('a', '/b/c', 'd'), ('/a', '/b/c', 'd'), # Empty components get removed. @@ -175,11 +178,11 @@ def test_drive_root_parts_common(self): # Unanchored parts. check((), '', '', ()) check(('a',), '', '', ('a',)) - check(('a/',), '', '', ('a',)) + check(('a/',), '', '', ('a', '')) check(('a', 'b'), '', '', ('a', 'b')) # Expansion. check(('a/b',), '', '', ('a', 'b')) - check(('a/b/',), '', '', ('a', 'b')) + check(('a/b/',), '', '', ('a', 'b', '')) check(('a', 'b/c', 'd'), '', '', ('a', 'b', 'c', 'd')) # Collapsing and stripping excess slashes. check(('a', 'b//c', 'd'), '', '', ('a', 'b', 'c', 'd')) @@ -188,7 +191,7 @@ def test_drive_root_parts_common(self): check(('.',), '', '', ()) check(('.', '.', 'b'), '', '', ('b',)) check(('a', '.', 'b'), '', '', ('a', 'b')) - check(('a', '.', '.'), '', '', ('a',)) + check(('a', '.', '.'), '', '', ('a', '')) # The first part is anchored. check(('/a/b',), '', sep, (sep, 'a', 'b')) check(('/a', 'b'), '', sep, (sep, 'a', 'b')) @@ -311,7 +314,7 @@ def test_match_common(self): self.assertTrue(P('a/b/c.py').match('**')) self.assertTrue(P('/a/b/c.py').match('**')) self.assertTrue(P('/a/b/c.py').match('/**')) - self.assertTrue(P('/a/b/c.py').match('**/')) + #self.assertTrue(P('/a/b/c.py').match('**/')) self.assertTrue(P('/a/b/c.py').match('/a/**')) self.assertTrue(P('/a/b/c.py').match('**/*.py')) self.assertTrue(P('/a/b/c.py').match('/**/*.py')) @@ -575,8 +578,6 @@ def test_with_suffix_common(self): self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d') self.assertRaises(ValueError, P('a/b').with_suffix, './.d') self.assertRaises(ValueError, P('a/b').with_suffix, '.d/.') - self.assertRaises(ValueError, P('a/b').with_suffix, - (self.pathmod.sep, 'd')) def test_relative_to_common(self): P = self.cls @@ -888,7 +889,8 @@ class PureWindowsPathTest(PurePathTest): equivalences = PurePathTest.equivalences.copy() equivalences.update({ './a:b': [ ('./a:b',) ], - 'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('.', 'c:', 'a') ], + 'c:a': [ ('c:', 'a'), ('.', 'c:', 'a') ], + 'c:a/': [('c:', 'a/'), ('c:', 'a', '.')], 'c:/a': [ ('c:/', 'a'), ('c:', '/', 'a'), ('c:', '/a'), ('/z', 'c:/', 'a'), ('//x/y', 'c:/', 'a'), @@ -921,7 +923,7 @@ def test_drive_root_parts(self): # UNC paths. check(('a', '//b/c', 'd'), '\\\\b\\c', '\\', ('\\\\b\\c\\', 'd')) # Collapsing and stripping excess slashes. - check(('a', 'Z://b//c/', 'd/'), 'Z:', '\\', ('Z:\\', 'b', 'c', 'd')) + check(('a', 'Z://b//c/', 'd/'), 'Z:', '\\', ('Z:\\', 'b', 'c', 'd', '')) # UNC paths. check(('a', '//b/c//', 'd'), '\\\\b\\c', '\\', ('\\\\b\\c\\', 'd')) # Extended paths. @@ -1970,9 +1972,9 @@ def _check(glob, expected): _check(p.glob("brokenLink"), ['brokenLink']) if not self.can_symlink: - _check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE"]) + _check(p.glob("*/"), ["dirA/", "dirB/", "dirC/", "dirE/"]) else: - _check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"]) + _check(p.glob("*/"), ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) def test_glob_empty_pattern(self): p = self.cls() @@ -2005,17 +2007,17 @@ def _check(path, glob, expected): _check(p, "*A", ["dirA", "fileA", "linkA"]) _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) - _check(p, "*/", ["dirA", "dirB", "dirC", "dirE", "linkB"]) + _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/.."]) - _check(p, "dir*/**/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", - "dirC", "dirC/dirD", "dirE"]) + _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirC/dirD/", "dirE/"]) _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check(p, "dir*/*/**/", ["dirA/linkC", "dirA/linkC/linkD", "dirB/linkD", "dirC/dirD"]) + _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirC/dirD/.."]) _check(p, "dir*/**/fileC", ["dirC/fileC"]) - _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD"]) - _check(p, "*/dirD/**/", ["dirC/dirD"]) + _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) + _check(p, "*/dirD/**/", ["dirC/dirD/"]) def test_glob_no_follow_symlinks_common(self): if not self.can_symlink: @@ -2030,15 +2032,15 @@ def _check(path, glob, expected): _check(p, "*A", ["dirA", "fileA", "linkA"]) _check(p, "*B/*", ["dirB/fileB", "dirB/linkD"]) _check(p, "*/fileB", ["dirB/fileB"]) - _check(p, "*/", ["dirA", "dirB", "dirC", "dirE"]) + _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/"]) _check(p, "dir*/*/..", ["dirC/dirD/.."]) - _check(p, "dir*/**/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) + _check(p, "dir*/**/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"]) _check(p, "dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check(p, "dir*/*/**/", ["dirC/dirD"]) + _check(p, "dir*/*/**/", ["dirC/dirD/"]) _check(p, "dir*/*/**/..", ["dirC/dirD/.."]) _check(p, "dir*/**/fileC", ["dirC/fileC"]) - _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD"]) - _check(p, "*/dirD/**/", ["dirC/dirD"]) + _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) + _check(p, "*/dirD/**/", ["dirC/dirD/"]) def test_rglob_common(self): def _check(glob, expected): @@ -2060,25 +2062,25 @@ def _check(glob, expected): "dirC/fileC", "dirC/dirD/fileD"]) if not self.can_symlink: _check(p.rglob("*/"), [ - "dirA", "dirB", "dirC", "dirC/dirD", "dirE", + "dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/", ]) else: _check(p.rglob("*/"), [ - "dirA", "dirA/linkC", "dirB", "dirB/linkD", "dirC", - "dirC/dirD", "dirE", "linkB", + "dirA/", "dirA/linkC/", "dirB/", "dirB/linkD/", "dirC/", + "dirC/dirD/", "dirE/", "linkB/", ]) - _check(p.rglob(""), ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"]) + _check(p.rglob(""), ["./", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) p = P(BASE, "dirC") _check(p.rglob("*"), ["dirC/fileC", "dirC/novel.txt", "dirC/dirD", "dirC/dirD/fileD"]) _check(p.rglob("file*"), ["dirC/fileC", "dirC/dirD/fileD"]) _check(p.rglob("**/file*"), ["dirC/fileC", "dirC/dirD/fileD"]) - _check(p.rglob("dir*/**/"), ["dirC/dirD"]) + _check(p.rglob("dir*/**/"), ["dirC/dirD/"]) _check(p.rglob("*/*"), ["dirC/dirD/fileD"]) - _check(p.rglob("*/"), ["dirC/dirD"]) - _check(p.rglob(""), ["dirC", "dirC/dirD"]) - _check(p.rglob("**/"), ["dirC", "dirC/dirD"]) + _check(p.rglob("*/"), ["dirC/dirD/"]) + _check(p.rglob(""), ["dirC/", "dirC/dirD/"]) + _check(p.rglob("**/"), ["dirC/", "dirC/dirD/"]) # gh-91616, a re module regression _check(p.rglob("*.txt"), ["dirC/novel.txt"]) _check(p.rglob("*.*"), ["dirC/novel.txt"]) @@ -2097,18 +2099,18 @@ def _check(path, glob, expected): _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"]) _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) - _check(p, "*/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", - "dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"]) - _check(p, "", ["", "dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD", - "dirC", "dirE", "dirC/dirD", "linkB", "linkB/linkD"]) + _check(p, "*/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirC/dirD/", "dirE/", "linkB/", "linkB/linkD/"]) + _check(p, "", ["./", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirE/", "dirC/dirD/", "linkB/", "linkB/linkD/"]) p = P(BASE, "dirC") _check(p, "*", ["dirC/fileC", "dirC/novel.txt", "dirC/dirD", "dirC/dirD/fileD"]) _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) _check(p, "*/*", ["dirC/dirD/fileD"]) - _check(p, "*/", ["dirC/dirD"]) - _check(p, "", ["dirC", "dirC/dirD"]) + _check(p, "*/", ["dirC/dirD/"]) + _check(p, "", ["dirC/", "dirC/dirD/"]) # gh-91616, a re module regression _check(p, "*.txt", ["dirC/novel.txt"]) _check(p, "*.*", ["dirC/novel.txt"]) @@ -2125,16 +2127,16 @@ def _check(path, glob, expected): _check(p, "*/fileA", []) _check(p, "*/fileB", ["dirB/fileB"]) _check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ]) - _check(p, "*/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"]) - _check(p, "", ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"]) + _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"]) + _check(p, "", ["./", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) p = P(BASE, "dirC") _check(p, "*", ["dirC/fileC", "dirC/novel.txt", "dirC/dirD", "dirC/dirD/fileD"]) _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) _check(p, "*/*", ["dirC/dirD/fileD"]) - _check(p, "*/", ["dirC/dirD"]) - _check(p, "", ["dirC", "dirC/dirD"]) + _check(p, "*/", ["dirC/dirD/"]) + _check(p, "", ["dirC/", "dirC/dirD/"]) # gh-91616, a re module regression _check(p, "*.txt", ["dirC/novel.txt"]) _check(p, "*.*", ["dirC/novel.txt"]) @@ -3644,7 +3646,7 @@ def test_glob(self): P = self.cls p = P(BASE) self.assertEqual(set(p.glob("FILEa")), { P(BASE, "fileA") }) - self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA") }) + self.assertEqual(set(p.glob("*a\\")), { P(BASE, "dirA/") }) self.assertEqual(set(p.glob("F*a")), { P(BASE, "fileA") }) self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) @@ -3653,7 +3655,7 @@ def test_rglob(self): P = self.cls p = P(BASE, "dirC") self.assertEqual(set(p.rglob("FILEd")), { P(BASE, "dirC/dirD/fileD") }) - self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD") }) + self.assertEqual(set(p.rglob("*\\")), { P(BASE, "dirC/dirD/") }) self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) def test_expanduser(self): From df1e1158392edf56b35a40c77c0819042be772b7 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 24 Nov 2023 10:00:50 +0000 Subject: [PATCH 02/23] Fix absolute() on bare DOS drive --- Lib/pathlib.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 434715b47d4c82..9b129e2a3b3196 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1452,6 +1452,8 @@ def absolute(self): elif self.drive: # There is a CWD on each drive-letter drive. cwd = os.path.abspath(self.drive) + if not self._tail: + return self.with_segments(cwd) else: cwd = os.getcwd() # Fast path for "empty" paths, e.g. Path("."), Path("") or Path(). From 61be2e385694fece50ccb723c1ecf0fbc613d60a Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 24 Nov 2023 10:12:59 +0000 Subject: [PATCH 03/23] Simplify diff --- Lib/test/test_pathlib.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 20792b7e40823f..af10b35d9711ea 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -2069,7 +2069,7 @@ def _check(glob, expected): "dirA/", "dirA/linkC/", "dirB/", "dirB/linkD/", "dirC/", "dirC/dirD/", "dirE/", "linkB/", ]) - _check(p.rglob(""), ["./", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) + _check(p.rglob(""), ["", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) p = P(BASE, "dirC") _check(p.rglob("*"), ["dirC/fileC", "dirC/novel.txt", @@ -2101,7 +2101,7 @@ def _check(path, glob, expected): "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) _check(p, "*/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", "dirC/", "dirC/dirD/", "dirE/", "linkB/", "linkB/linkD/"]) - _check(p, "", ["./", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + _check(p, "", ["", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", "dirC/", "dirE/", "dirC/dirD/", "linkB/", "linkB/linkD/"]) p = P(BASE, "dirC") @@ -2128,7 +2128,7 @@ def _check(path, glob, expected): _check(p, "*/fileB", ["dirB/fileB"]) _check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ]) _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"]) - _check(p, "", ["./", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) + _check(p, "", ["", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) p = P(BASE, "dirC") _check(p, "*", ["dirC/fileC", "dirC/novel.txt", From 445364a1262da2995803d3c58ad4677f80048ed6 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 24 Nov 2023 10:38:43 +0000 Subject: [PATCH 04/23] Reduce diff --- Lib/pathlib.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 9b129e2a3b3196..9cdfbe965efbdb 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -88,12 +88,6 @@ def _compile_pattern(pat, sep, case_sensitive): return re.compile(regex, flags).match -def _select_parents(paths, dir_only): - """Yield lexical '..' children of the given paths.""" - for path in paths: - yield path._make_child_relpath('..', dir_only) - - def _select_children(parent_paths, dir_only, follow_symlinks, match): """Yield direct children of given paths, filtering by name and type.""" if follow_symlinks is None: @@ -1094,7 +1088,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): pass elif part == '..': dir_only = part_idx < len(pattern_parts) - paths = _select_parents(paths, dir_only) + paths = (path._make_child_relpath('..', dir_only) for path in paths) elif part == '**': # Consume adjacent '**' components. while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**': From 63958156efd4c27b13086470d65d016ea431d3f2 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 24 Nov 2023 12:04:05 +0000 Subject: [PATCH 05/23] Speed up _make_child_relpath() --- Lib/pathlib.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 9cdfbe965efbdb..b0e6739d3715ce 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1009,26 +1009,28 @@ def _scandir(self): return contextlib.nullcontext(self.iterdir()) def _make_child_relpath(self, name, trailing_slash=False): - path_str = str(self) - tail = list(self._tail) + drive = self.drive + root = self.root + tail = self._tail.copy() + parts = [] if tail: + parts.append(str(self)) if tail[-1]: - path_str = f'{path_str}{self.pathmod.sep}{name}' + parts.append(self.pathmod.sep) else: - path_str = f'{path_str}{name}' tail.pop(-1) - elif path_str != '.': - path_str = f'{path_str}{name}' - else: - path_str = name + elif root or drive: + parts.append(str(self)) + parts.append(name) tail.append(name) if trailing_slash: - path_str = f'{path_str}{self.pathmod.sep}' + parts.append(self.pathmod.sep) tail.append('') + path_str = ''.join(parts) path = self.with_segments(path_str) path._str = path_str - path._drv = self.drive - path._root = self.root + path._drv = drive + path._root = root path._tail_cached = tail return path From 435be1b5823a50b3c1627e7238cb92164cbddd99 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 25 Nov 2023 11:59:57 +0000 Subject: [PATCH 06/23] Ignore empty initialiser arguments for backwards compat --- Lib/pathlib.py | 3 ++- Lib/test/test_pathlib.py | 11 +++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index b0e6739d3715ce..7e2a8b30e4eb10 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -620,7 +620,8 @@ def __init__(self, *args): "argument should be a str or an os.PathLike " "object where __fspath__ returns a str, " f"not {type(path).__name__!r}") - paths.append(path) + if path: + paths.append(path) self._raw_paths = paths self._resolving = False diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index af10b35d9711ea..06604cdbc5773a 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -55,11 +55,10 @@ class PurePathTest(unittest.TestCase): 'a/b': [ ('a', 'b'), ('a/', 'b'), ('a//b',), # Empty components get removed. - ('', 'a', 'b'), ('a', '', 'b'), + ('', 'a', 'b'), ('a', '', 'b'), ('a', 'b', ''), ], 'a/b/': [ - ('a', 'b/'), ('a/', 'b/'), - ('a/b/',), ('a//b//',), ('a', 'b', ''), + ('a', 'b/'), ('a/', 'b/'), ('a/b/',), ('a//b//',), ], '/b/c/d': [ ('a', '/b/c', 'd'), ('/a', '/b/c', 'd'), @@ -2069,7 +2068,7 @@ def _check(glob, expected): "dirA/", "dirA/linkC/", "dirB/", "dirB/linkD/", "dirC/", "dirC/dirD/", "dirE/", "linkB/", ]) - _check(p.rglob(""), ["", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) + _check(p.rglob(""), ["./", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) p = P(BASE, "dirC") _check(p.rglob("*"), ["dirC/fileC", "dirC/novel.txt", @@ -2101,7 +2100,7 @@ def _check(path, glob, expected): "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) _check(p, "*/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", "dirC/", "dirC/dirD/", "dirE/", "linkB/", "linkB/linkD/"]) - _check(p, "", ["", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + _check(p, "", ["./", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", "dirC/", "dirE/", "dirC/dirD/", "linkB/", "linkB/linkD/"]) p = P(BASE, "dirC") @@ -2128,7 +2127,7 @@ def _check(path, glob, expected): _check(p, "*/fileB", ["dirB/fileB"]) _check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ]) _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirC/dirD/", "dirE/"]) - _check(p, "", ["", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) + _check(p, "", ["./", "dirA/", "dirB/", "dirC/", "dirE/", "dirC/dirD/"]) p = P(BASE, "dirC") _check(p, "*", ["dirC/fileC", "dirC/novel.txt", From d766d068d67171522d933343993a1307f25d4c97 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 1 Dec 2023 21:41:26 +0000 Subject: [PATCH 07/23] Simplify implementation --- Lib/pathlib.py | 146 ++++++++++++++++++++++++------------------------- 1 file changed, 73 insertions(+), 73 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index d0b4e69e584b1a..b4cde2c881ecc4 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -216,7 +216,7 @@ class PurePath: # `os.path.splitroot()`, except that the tail is further split on path # separators (i.e. it is a list of strings), and that the root and # tail are normalized. - '_drv', '_root', '_tail_cached', + '_drv', '_root', '_tail_cached', '_has_trailing_sep', # The `_str` slot stores the string representation of the path, # computed from the drive, root and tail when `__str__()` is called @@ -256,7 +256,7 @@ def with_segments(self, *pathsegments): @classmethod def _parse_path(cls, path): if not path: - return '', '', [] + return '', '', [], False sep = cls.pathmod.sep altsep = cls.pathmod.altsep if altsep: @@ -271,10 +271,8 @@ def _parse_path(cls, path): # e.g. //?/unc/server/share root = sep parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.'] - if parsed and not rel.endswith(parsed[-1]): - # Preserve trailing slash - parsed.append('') - return drv, root, parsed + has_trailing_sep = parsed and not rel.endswith(parsed[-1]) + return drv, root, parsed, has_trailing_sep def _load_parts(self): paths = self._raw_paths @@ -284,22 +282,26 @@ def _load_parts(self): path = paths[0] else: path = self.pathmod.join(*paths) - drv, root, tail = self._parse_path(path) + drv, root, tail, has_trailing_sep = self._parse_path(path) self._drv = drv self._root = root self._tail_cached = tail + self._has_trailing_sep = has_trailing_sep - def _from_parsed_parts(self, drv, root, tail): - path_str = self._format_parsed_parts(drv, root, tail) + def _from_parsed_parts(self, drv, root, tail, has_trailing_sep=False): + path_str = self._format_parsed_parts(drv, root, tail, has_trailing_sep) path = self.with_segments(path_str) path._str = path_str or '.' path._drv = drv path._root = root path._tail_cached = tail + path._has_trailing_sep = has_trailing_sep return path @classmethod - def _format_parsed_parts(cls, drv, root, tail): + def _format_parsed_parts(cls, drv, root, tail, has_trailing_sep): + if has_trailing_sep: + tail = tail + [''] if drv or root: return drv + root + cls.pathmod.sep.join(tail) elif tail and cls.pathmod.splitdrive(tail[0])[0]: @@ -313,7 +315,7 @@ def __str__(self): return self._str except AttributeError: self._str = self._format_parsed_parts(self.drive, self.root, - self._tail) or '.' + self._tail, self.has_trailing_sep) or '.' return self._str def as_posix(self): @@ -350,6 +352,14 @@ def _tail(self): self._load_parts() return self._tail_cached + @property + def has_trailing_sep(self): + try: + return self._has_trailing_sep + except AttributeError: + self._load_parts() + return self._has_trailing_sep + @property def anchor(self): """The concatenation of the drive and root, or ''.""" @@ -362,7 +372,7 @@ def name(self): tail = self._tail if not tail: return '' - return tail[-1] or tail[-2] + return tail[-1] @property def suffix(self): @@ -401,27 +411,6 @@ def stem(self): else: return name - @property - def has_trailing_sep(self): - tail = self._tail - return tail and not tail[-1] - - def without_trailing_sep(self): - tail = self._tail - if tail and not tail[-1]: - return self._from_parsed_parts(self.drive, self.root, tail[:-1]) - else: - return self - - def with_trailing_sep(self): - tail = self._tail - if not tail: - raise ValueError('empty name') - elif tail[-1]: - return self._from_parsed_parts(self.drive, self.root, tail + ['']) - else: - return self - def with_name(self, name): """Return a new path with the file name changed.""" m = self.pathmod @@ -430,9 +419,8 @@ def with_name(self, name): tail = self._tail.copy() if not tail: raise ValueError(f"{self!r} has an empty name") - idx = -1 if tail[-1] else -2 - tail[idx] = name - return self._from_parsed_parts(self.drive, self.root, tail) + tail[-1] = name + return self._from_parsed_parts(self.drive, self.root, tail, self.has_trailing_sep) def with_stem(self, stem): """Return a new path with the stem changed.""" @@ -450,6 +438,19 @@ def with_suffix(self, suffix): else: raise ValueError(f"Invalid suffix {suffix!r}") + def without_trailing_sep(self): + if not self.has_trailing_sep: + return self + return self._from_parsed_parts(self.drive, self.root, self._tail, False) + + def with_trailing_sep(self): + if self.has_trailing_sep: + return self + tail = self._tail + if not tail: + raise ValueError('empty name') + return self._from_parsed_parts(self.drive, self.root, tail, True) + def relative_to(self, other, /, *_deprecated, walk_up=False): """Return the relative path to another path identified by the passed arguments. If the operation is not possible (because this is not @@ -478,7 +479,8 @@ def relative_to(self, other, /, *_deprecated, walk_up=False): else: raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors") parts = ['..'] * step + self._tail[len(path._tail):] - return self._from_parsed_parts('', '', parts) + has_trailing_sep = self.has_trailing_sep and bool(parts) + return self._from_parsed_parts('', '', parts, has_trailing_sep) def is_relative_to(self, other, /, *_deprecated): """Return True if the path is relative to another path or False. @@ -499,10 +501,12 @@ def is_relative_to(self, other, /, *_deprecated): def parts(self): """An object providing sequence-like access to the components in the filesystem path.""" + result = tuple(self._tail) if self.drive or self.root: - return (self.drive + self.root,) + tuple(self._tail) - else: - return tuple(self._tail) + result = (self.drive + self.root,) + result + if self.has_trailing_sep: + result = result + ('',) + return result def joinpath(self, *pathsegments): """Combine this path with one or several arguments, and return a @@ -532,8 +536,7 @@ def parent(self): tail = self._tail if not tail: return self - idx = -1 if tail[-1] else -2 - path = self._from_parsed_parts(drv, root, tail[:idx]) + path = self._from_parsed_parts(drv, root, tail[:-1]) path._resolving = self._resolving return path @@ -542,7 +545,7 @@ def parents(self): """A sequence of this path's logical parents.""" # The value of this property should not be cached on the path object, # as doing so would introduce a reference cycle. - return _PathParents(self.without_trailing_sep()) + return _PathParents(self) def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, @@ -1012,30 +1015,24 @@ def _scandir(self): # context manager. This method is called by walk() and glob(). return contextlib.nullcontext(self.iterdir()) - def _make_child_relpath(self, name, trailing_slash=False): - drive = self.drive - root = self.root - tail = self._tail.copy() - parts = [] - if tail: - parts.append(str(self)) - if tail[-1]: - parts.append(self.pathmod.sep) - else: - tail.pop(-1) - elif root or drive: - parts.append(str(self)) - parts.append(name) - tail.append(name) - if trailing_slash: - parts.append(self.pathmod.sep) - tail.append('') - path_str = ''.join(parts) + def _make_child_relpath(self, name, has_trailing_sep=False): + path_str = str(self) + tail = self._tail + sep = self.pathmod.sep + if tail and not self.has_trailing_sep: + path_str = f'{path_str}{sep}{name}' + elif path_str != '.': + path_str = f'{path_str}{name}' + else: + path_str = name + if has_trailing_sep: + path_str = f'{path_str}{sep}' path = self.with_segments(path_str) path._str = path_str - path._drv = drive - path._root = root - path._tail_cached = tail + path._drv = self.drive + path._root = self.root + path._tail_cached = tail + [name] + path._has_trailing_sep = has_trailing_sep return path def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): @@ -1060,7 +1057,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): elif not path_pattern._tail: raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - pattern_parts = list(path_pattern._tail) + pattern_parts = list(path_pattern.parts) if pattern_parts[-1] == '**': # GH-70303: '**' only matches directories. Add trailing slash. warnings.warn( @@ -1084,7 +1081,12 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): filter_paths = follow_symlinks is not None and '..' not in pattern_parts deduplicate_paths = False sep = self.pathmod.sep - paths = iter([self.with_trailing_sep()] if self.is_dir() else []) + if not self.is_dir(): + paths = iter([]) + elif not self._tail: + paths = iter([self]) + else: + paths = iter([self.with_trailing_sep()]) part_idx = 0 while part_idx < len(pattern_parts): part = pattern_parts[part_idx] @@ -1243,8 +1245,6 @@ def resolve(self, strict=False): # Delete '..' segment and its predecessor path = path.parent continue - elif not part: - continue next_path = path._make_child_relpath(part) if querying and part != '..': next_path._resolving = True @@ -1452,8 +1452,6 @@ def absolute(self): elif self.drive: # There is a CWD on each drive-letter drive. cwd = os.path.abspath(self.drive) - if not self._tail: - return self.with_segments(cwd) else: cwd = os.getcwd() # Fast path for "empty" paths, e.g. Path("."), Path("") or Path(). @@ -1610,8 +1608,10 @@ def expanduser(self): homedir = os.path.expanduser(self._tail[0]) if homedir[:1] == "~": raise RuntimeError("Could not determine home directory.") - drv, root, tail = self._parse_path(homedir) - return self._from_parsed_parts(drv, root, tail + self._tail[1:]) + drv, root, tail, _ = self._parse_path(homedir) + tail.extend(self._tail[1:]) + has_trailing_sep = self.has_trailing_sep and bool(tail) + return self._from_parsed_parts(drv, root, tail, has_trailing_sep) return self From b27507899b622e6884f5b1d6abef7b3c83fa8e2a Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 3 Dec 2023 20:43:22 +0000 Subject: [PATCH 08/23] Add tests --- Lib/pathlib.py | 13 +-- Lib/test/test_pathlib.py | 173 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 174 insertions(+), 12 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index c764f39bdac5d6..e64191a0af776d 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -472,9 +472,8 @@ def relative_to(self, other, /, *_deprecated, walk_up=False): other = self.with_segments(other, *_deprecated) elif not isinstance(other, PurePath): other = self.with_segments(other) - other = other.without_trailing_sep() - for step, path in enumerate(chain([other], other.parents)): - if path == self or path in self.parents: + for step, path in enumerate(other._ancestors): + if path in self._ancestors: break elif not walk_up: raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}") @@ -498,8 +497,7 @@ def is_relative_to(self, other, /, *_deprecated): other = self.with_segments(other, *_deprecated) elif not isinstance(other, PurePath): other = self.with_segments(other) - other = other.without_trailing_sep() - return other == self or other in self.parents + return other.without_trailing_sep() in self._ancestors @property def parts(self): @@ -551,6 +549,11 @@ def parents(self): # as doing so would introduce a reference cycle. return _PathParents(self) + @property + def _ancestors(self): + yield self.without_trailing_sep() + yield from _PathParents(self) + def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 06604cdbc5773a..1d337f7abea6b2 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -236,7 +236,7 @@ def _check_str(self, expected, args): def test_str_common(self): # Canonicalized paths roundtrip. - for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): + for pathstr in ('a', 'a/', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c', '/a/b/c/'): self._check_str(pathstr, (pathstr,)) # Special case for the empty path. self._check_str('.', ('',)) @@ -244,12 +244,12 @@ def test_str_common(self): def test_as_posix_common(self): P = self.cls - for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): + for pathstr in ('a', 'a/', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c', '/a/b/c/'): self.assertEqual(P(pathstr).as_posix(), pathstr) # Other tests for as_posix() are in test_equivalences(). def test_repr_common(self): - for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): + for pathstr in ('a', 'a/', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c', '/a/b/c/'): with self.subTest(pathstr=pathstr): p = self.cls(pathstr) clsname = p.__class__.__name__ @@ -265,9 +265,12 @@ def test_eq_common(self): self.assertEqual(P('a/b'), P('a/b')) self.assertEqual(P('a/b'), P('a', 'b')) self.assertNotEqual(P('a/b'), P('a')) + self.assertNotEqual(P('a/b'), P('a/')) + self.assertNotEqual(P('a/b'), P('a/b/')) self.assertNotEqual(P('a/b'), P('/a/b')) self.assertNotEqual(P('a/b'), P()) self.assertNotEqual(P('/a/b'), P('/')) + self.assertNotEqual(P('/a/b'), P('/a/b/')) self.assertNotEqual(P(), P('/')) self.assertNotEqual(P(), "") self.assertNotEqual(P(), {}) @@ -313,7 +316,6 @@ def test_match_common(self): self.assertTrue(P('a/b/c.py').match('**')) self.assertTrue(P('/a/b/c.py').match('**')) self.assertTrue(P('/a/b/c.py').match('/**')) - #self.assertTrue(P('/a/b/c.py').match('**/')) self.assertTrue(P('/a/b/c.py').match('/a/**')) self.assertTrue(P('/a/b/c.py').match('**/*.py')) self.assertTrue(P('/a/b/c.py').match('/**/*.py')) @@ -341,6 +343,33 @@ def test_match_common(self): self.assertFalse(P().match('*')) self.assertTrue(P().match('**')) self.assertFalse(P().match('**/*')) + # Matching empty path with ** and **/ + self.assertTrue(P().match('**')) + self.assertTrue(P().match('**/')) + self.assertTrue(P('').match('**')) + self.assertTrue(P('').match('**/')) + self.assertTrue(P('.').match('**')) + self.assertTrue(P('.').match('**/')) + self.assertTrue(P('', '').match('**')) + self.assertTrue(P('', '').match('**/')) + self.assertTrue(P('.', '').match('**')) + self.assertTrue(P('.', '').match('**/')) + self.assertTrue(P('', '.').match('**')) + self.assertTrue(P('', '.').match('**/')) + self.assertTrue(P('.', '.').match('**')) + self.assertTrue(P('.', '.').match('**/')) + # Matching single segment with/without trailing slash + self.assertTrue(P('foo').match('*')) + self.assertFalse(P('foo').match('*/')) + self.assertFalse(P('foo/').match('*')) + self.assertTrue(P('foo/').match('*/')) + # Matching any segments with trailing slash + self.assertFalse(P('/foo').match('**/')) + self.assertTrue(P('/foo/').match('**/')) + self.assertFalse(P('foo').match('**/')) + self.assertTrue(P('foo/').match('**/')) + self.assertFalse(P('foo/bar').match('**/')) + self.assertTrue(P('foo/bar/').match('**/')) def test_parts_common(self): # `parts` returns a tuple. @@ -353,6 +382,10 @@ def test_parts_common(self): p = P('/a/b') parts = p.parts self.assertEqual(parts, (sep, 'a', 'b')) + # When the path has a trailing separator, an additional empty part is present. + p = P('a/b/') + parts = p.parts + self.assertEqual(parts, ('a', 'b', '')) def test_equivalences(self): for k, tuples in self.equivalences.items(): @@ -386,6 +419,12 @@ def test_parent_common(self): self.assertEqual(p.parent.parent, P('/a')) self.assertEqual(p.parent.parent.parent, P('/')) self.assertEqual(p.parent.parent.parent.parent, P('/')) + # Trailing sep + p = P('/a/b/c/') + self.assertEqual(p.parent, P('/a/b')) + self.assertEqual(p.parent.parent, P('/a')) + self.assertEqual(p.parent.parent.parent, P('/')) + self.assertEqual(p.parent.parent.parent.parent, P('/')) def test_parents_common(self): # Relative @@ -433,6 +472,27 @@ def test_parents_common(self): par[-4] with self.assertRaises(IndexError): par[3] + # Trailing sep + p = P('/a/b/c/') + par = p.parents + self.assertEqual(len(par), 3) + self.assertEqual(par[0], P('/a/b')) + self.assertEqual(par[1], P('/a')) + self.assertEqual(par[2], P('/')) + self.assertEqual(par[-1], P('/')) + self.assertEqual(par[-2], P('/a')) + self.assertEqual(par[-3], P('/a/b')) + self.assertEqual(par[0:1], (P('/a/b'),)) + self.assertEqual(par[:2], (P('/a/b'), P('/a'))) + self.assertEqual(par[:-1], (P('/a/b'), P('/a'))) + self.assertEqual(par[1:], (P('/a'), P('/'))) + self.assertEqual(par[::2], (P('/a/b'), P('/'))) + self.assertEqual(par[::-1], (P('/'), P('/a'), P('/a/b'))) + self.assertEqual(list(par), [P('/a/b'), P('/a'), P('/')]) + with self.assertRaises(IndexError): + par[-4] + with self.assertRaises(IndexError): + par[3] def test_drive_common(self): P = self.cls @@ -462,10 +522,13 @@ def test_name_common(self): self.assertEqual(P('.').name, '') self.assertEqual(P('/').name, '') self.assertEqual(P('a/b').name, 'b') + self.assertEqual(P('a/b/').name, 'b') self.assertEqual(P('/a/b').name, 'b') + self.assertEqual(P('/a/b/').name, 'b') self.assertEqual(P('/a/b/.').name, 'b') self.assertEqual(P('a/b.py').name, 'b.py') self.assertEqual(P('/a/b.py').name, 'b.py') + self.assertEqual(P('/etc/cron.d/').name, 'cron.d') def test_suffix_common(self): P = self.cls @@ -474,7 +537,9 @@ def test_suffix_common(self): self.assertEqual(P('..').suffix, '') self.assertEqual(P('/').suffix, '') self.assertEqual(P('a/b').suffix, '') + self.assertEqual(P('a/b/').suffix, '') self.assertEqual(P('/a/b').suffix, '') + self.assertEqual(P('/a/b/').suffix, '') self.assertEqual(P('/a/b/.').suffix, '') self.assertEqual(P('a/b.py').suffix, '.py') self.assertEqual(P('/a/b.py').suffix, '.py') @@ -486,6 +551,7 @@ def test_suffix_common(self): self.assertEqual(P('/a/b.tar.gz').suffix, '.gz') self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('/etc/cron.d/').suffix, '.d') def test_suffixes_common(self): P = self.cls @@ -493,7 +559,9 @@ def test_suffixes_common(self): self.assertEqual(P('.').suffixes, []) self.assertEqual(P('/').suffixes, []) self.assertEqual(P('a/b').suffixes, []) + self.assertEqual(P('a/b/').suffixes, []) self.assertEqual(P('/a/b').suffixes, []) + self.assertEqual(P('/a/b/').suffixes, []) self.assertEqual(P('/a/b/.').suffixes, []) self.assertEqual(P('a/b.py').suffixes, ['.py']) self.assertEqual(P('/a/b.py').suffixes, ['.py']) @@ -505,6 +573,7 @@ def test_suffixes_common(self): self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz']) self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, []) + self.assertEqual(P('/etc/cron.d/').suffixes, ['.d']) def test_stem_common(self): P = self.cls @@ -513,12 +582,14 @@ def test_stem_common(self): self.assertEqual(P('..').stem, '..') self.assertEqual(P('/').stem, '') self.assertEqual(P('a/b').stem, 'b') + self.assertEqual(P('a/b/').stem, 'b') self.assertEqual(P('a/b.py').stem, 'b') self.assertEqual(P('a/.hgrc').stem, '.hgrc') self.assertEqual(P('a/.hg.rc').stem, '.hg') self.assertEqual(P('a/b.tar.gz').stem, 'b.tar') self.assertEqual(P('a/Some name. Ending with a dot.').stem, 'Some name. Ending with a dot.') + self.assertEqual(P('/etc/cron.d/').stem, 'cron') def test_with_name_common(self): P = self.cls @@ -528,6 +599,7 @@ def test_with_name_common(self): self.assertEqual(P('/a/b.py').with_name('d.xml'), P('/a/d.xml')) self.assertEqual(P('a/Dot ending.').with_name('d.xml'), P('a/d.xml')) self.assertEqual(P('/a/Dot ending.').with_name('d.xml'), P('/a/d.xml')) + self.assertEqual(P('/etc/cron.d/').with_name('tron.g'), P('/etc/tron.g/')) self.assertRaises(ValueError, P('').with_name, 'd.xml') self.assertRaises(ValueError, P('.').with_name, 'd.xml') self.assertRaises(ValueError, P('/').with_name, 'd.xml') @@ -546,6 +618,7 @@ def test_with_stem_common(self): self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz')) self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d')) self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d')) + self.assertEqual(P('/etc/cron.d/').with_stem('tron'), P('/etc/tron.d/')) self.assertRaises(ValueError, P('').with_stem, 'd') self.assertRaises(ValueError, P('.').with_stem, 'd') self.assertRaises(ValueError, P('/').with_stem, 'd') @@ -561,9 +634,11 @@ def test_with_suffix_common(self): self.assertEqual(P('/a/b').with_suffix('.gz'), P('/a/b.gz')) self.assertEqual(P('a/b.py').with_suffix('.gz'), P('a/b.gz')) self.assertEqual(P('/a/b.py').with_suffix('.gz'), P('/a/b.gz')) + self.assertEqual(P('/etc/cron.d/').with_suffix('.g'), P('/etc/cron.g/')) # Stripping suffix. self.assertEqual(P('a/b.py').with_suffix(''), P('a/b')) self.assertEqual(P('/a/b').with_suffix(''), P('/a/b')) + self.assertEqual(P('/etc/cron.d/').with_suffix(''), P('/etc/cron/')) # Path doesn't have a "filename" component. self.assertRaises(ValueError, P('').with_suffix, '.gz') self.assertRaises(ValueError, P('.').with_suffix, '.gz') @@ -657,6 +732,25 @@ def test_relative_to_common(self): self.assertRaises(ValueError, p.relative_to, P("a/.."), walk_up=True) self.assertRaises(ValueError, p.relative_to, P("/a/.."), walk_up=True) + def test_relative_to_trailing_sep(self): + P = self.cls + self.assertEqual(P('foo').relative_to('foo'), P()) + self.assertEqual(P('foo').relative_to('foo/'), P()) + self.assertEqual(P('foo/').relative_to('foo'), P()) + self.assertEqual(P('foo/').relative_to('foo/'), P()) + self.assertEqual(P('foo/bar').relative_to('foo'), P('bar')) + self.assertEqual(P('foo/bar').relative_to('foo/'), P('bar')) + self.assertEqual(P('foo/bar/').relative_to('foo'), P('bar/')) + self.assertEqual(P('foo/bar/').relative_to('foo/'), P('bar/')) + self.assertEqual(P('foo').relative_to('foo/bar', walk_up=True), P('..')) + self.assertEqual(P('foo').relative_to('foo/bar/', walk_up=True), P('..')) + self.assertEqual(P('foo/').relative_to('foo/bar', walk_up=True), P('../')) + self.assertEqual(P('foo/').relative_to('foo/bar/', walk_up=True), P('../')) + self.assertEqual(P('foo/oof').relative_to('foo/bar', walk_up=True), P('../oof')) + self.assertEqual(P('foo/oof').relative_to('foo/bar/', walk_up=True), P('../oof')) + self.assertEqual(P('foo/oof/').relative_to('foo/bar', walk_up=True), P('../oof/')) + self.assertEqual(P('foo/oof/').relative_to('foo/bar/', walk_up=True), P('../oof/')) + def test_is_relative_to_common(self): P = self.cls p = P('a/b') @@ -692,6 +786,25 @@ def test_is_relative_to_common(self): self.assertFalse(p.is_relative_to('')) self.assertFalse(p.is_relative_to(P('a'))) + def test_is_relative_to_trailing_sep(self): + P = self.cls + self.assertTrue(P('foo').is_relative_to('foo')) + self.assertTrue(P('foo').is_relative_to('foo/')) + self.assertTrue(P('foo/').is_relative_to('foo')) + self.assertTrue(P('foo/').is_relative_to('foo/')) + self.assertTrue(P('foo/bar').is_relative_to('foo')) + self.assertTrue(P('foo/bar').is_relative_to('foo/')) + self.assertTrue(P('foo/bar/').is_relative_to('foo')) + self.assertTrue(P('foo/bar/').is_relative_to('foo/')) + self.assertFalse(P('foo').is_relative_to('foo/bar')) + self.assertFalse(P('foo').is_relative_to('foo/bar/')) + self.assertFalse(P('foo/').is_relative_to('foo/bar')) + self.assertFalse(P('foo/').is_relative_to('foo/bar/')) + self.assertFalse(P('foo/oof').is_relative_to('foo/bar')) + self.assertFalse(P('foo/oof').is_relative_to('foo/bar/')) + self.assertFalse(P('foo/oof/').is_relative_to('foo/bar')) + self.assertFalse(P('foo/oof/').is_relative_to('foo/bar/')) + def test_pickling_common(self): P = self.cls p = P('/a/b') @@ -777,7 +890,7 @@ def test_as_uri_common(self): P().as_uri() def test_repr_roundtrips(self): - for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): + for pathstr in ('a', 'a/', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c', 'a/b/c/'): with self.subTest(pathstr=pathstr): p = self.cls(pathstr) r = repr(p) @@ -965,14 +1078,20 @@ def test_drive_root_parts(self): def test_str(self): p = self.cls('a/b/c') self.assertEqual(str(p), 'a\\b\\c') + p = self.cls('a/b/c/') + self.assertEqual(str(p), 'a\\b\\c\\') p = self.cls('c:/a/b/c') self.assertEqual(str(p), 'c:\\a\\b\\c') + p = self.cls('c:/a/b/c/') + self.assertEqual(str(p), 'c:\\a\\b\\c\\') p = self.cls('//a/b') self.assertEqual(str(p), '\\\\a\\b\\') p = self.cls('//a/b/c') self.assertEqual(str(p), '\\\\a\\b\\c') p = self.cls('//a/b/c/d') self.assertEqual(str(p), '\\\\a\\b\\c\\d') + p = self.cls('//a/b/c/d/') + self.assertEqual(str(p), '\\\\a\\b\\c\\d\\') def test_str_subclass(self): self._check_str_subclass('.\\a:b') @@ -990,6 +1109,7 @@ def test_eq(self): P = self.cls self.assertEqual(P('c:a/b'), P('c:a/b')) self.assertEqual(P('c:a/b'), P('c:', 'a', 'b')) + self.assertNotEqual(P('c:a/b'), P('c:a/b/')) self.assertNotEqual(P('c:a/b'), P('d:a/b')) self.assertNotEqual(P('c:a/b'), P('c:/a/b')) self.assertNotEqual(P('/a/b'), P('c:/a/b')) @@ -1070,6 +1190,10 @@ def test_parts(self): p = P('//a/b/c/d') parts = p.parts self.assertEqual(parts, ('\\\\a\\b\\', 'c', 'd')) + # Trailing sep + p = P('c:/a/b/') + parts = p.parts + self.assertEqual(parts, ('c:\\', 'a', 'b', '')) def test_parent(self): # Anchored @@ -1088,11 +1212,17 @@ def test_parent(self): self.assertEqual(p.parent, P('//a/b/c')) self.assertEqual(p.parent.parent, P('//a/b')) self.assertEqual(p.parent.parent.parent, P('//a/b')) + # Trailing sep + p = P('z:/a/b/c/') + self.assertEqual(p.parent, P('z:/a/b')) + self.assertEqual(p.parent.parent, P('z:/a')) + self.assertEqual(p.parent.parent.parent, P('z:/')) + self.assertEqual(p.parent.parent.parent.parent, P('z:/')) def test_parents(self): # Anchored P = self.cls - p = P('z:a/b/') + p = P('z:a/b') par = p.parents self.assertEqual(len(par), 2) self.assertEqual(par[0], P('z:a')) @@ -1106,7 +1236,7 @@ def test_parents(self): self.assertEqual(list(par), [P('z:a'), P('z:')]) with self.assertRaises(IndexError): par[2] - p = P('z:/a/b/') + p = P('z:/a/b') par = p.parents self.assertEqual(len(par), 2) self.assertEqual(par[0], P('z:/a')) @@ -1134,6 +1264,21 @@ def test_parents(self): self.assertEqual(list(par), [P('//a/b/c'), P('//a/b')]) with self.assertRaises(IndexError): par[2] + # Trailing sep + p = P('z:/a/b/') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('z:/a')) + self.assertEqual(par[1], P('z:/')) + self.assertEqual(par[0:1], (P('z:/a'),)) + self.assertEqual(par[0:-1], (P('z:/a'),)) + self.assertEqual(par[:2], (P('z:/a'), P('z:/'))) + self.assertEqual(par[1:], (P('z:/'),)) + self.assertEqual(par[::2], (P('z:/a'),)) + self.assertEqual(par[::-1], (P('z:/'), P('z:/a'),)) + self.assertEqual(list(par), [P('z:/a'), P('z:/')]) + with self.assertRaises(IndexError): + par[2] def test_drive(self): P = self.cls @@ -1171,18 +1316,23 @@ def test_name(self): self.assertEqual(P('c:').name, '') self.assertEqual(P('c:/').name, '') self.assertEqual(P('c:a/b').name, 'b') + self.assertEqual(P('c:a/b/').name, 'b') self.assertEqual(P('c:/a/b').name, 'b') + self.assertEqual(P('c:/a/b/').name, 'b') self.assertEqual(P('c:a/b.py').name, 'b.py') self.assertEqual(P('c:/a/b.py').name, 'b.py') self.assertEqual(P('//My.py/Share.php').name, '') self.assertEqual(P('//My.py/Share.php/a/b').name, 'b') + self.assertEqual(P('c:/etc/cron.d/').name, 'cron.d') def test_suffix(self): P = self.cls self.assertEqual(P('c:').suffix, '') self.assertEqual(P('c:/').suffix, '') self.assertEqual(P('c:a/b').suffix, '') + self.assertEqual(P('c:a/b/').suffix, '') self.assertEqual(P('c:/a/b').suffix, '') + self.assertEqual(P('c:/a/b/').suffix, '') self.assertEqual(P('c:a/b.py').suffix, '.py') self.assertEqual(P('c:/a/b.py').suffix, '.py') self.assertEqual(P('c:a/.hgrc').suffix, '') @@ -1195,13 +1345,16 @@ def test_suffix(self): self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('//My.py/Share.php').suffix, '') self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') + self.assertEqual(P('c:/etc/cron.d/').suffix, '.d') def test_suffixes(self): P = self.cls self.assertEqual(P('c:').suffixes, []) self.assertEqual(P('c:/').suffixes, []) self.assertEqual(P('c:a/b').suffixes, []) + self.assertEqual(P('c:a/b/').suffixes, []) self.assertEqual(P('c:/a/b').suffixes, []) + self.assertEqual(P('c:/a/b/').suffixes, []) self.assertEqual(P('c:a/b.py').suffixes, ['.py']) self.assertEqual(P('c:/a/b.py').suffixes, ['.py']) self.assertEqual(P('c:a/.hgrc').suffixes, []) @@ -1214,6 +1367,7 @@ def test_suffixes(self): self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) + self.assertEqual(P('c:/etc/cron.d/').suffixes, ['.d']) def test_stem(self): P = self.cls @@ -1222,12 +1376,14 @@ def test_stem(self): self.assertEqual(P('c:..').stem, '..') self.assertEqual(P('c:/').stem, '') self.assertEqual(P('c:a/b').stem, 'b') + self.assertEqual(P('c:a/b/').stem, 'b') self.assertEqual(P('c:a/b.py').stem, 'b') self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') self.assertEqual(P('c:a/.hg.rc').stem, '.hg') self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, 'Some name. Ending with a dot.') + self.assertEqual(P('c:/etc/cron.d/').stem, 'cron') def test_with_name(self): P = self.cls @@ -1235,6 +1391,7 @@ def test_with_name(self): self.assertEqual(P('c:/a/b').with_name('d.xml'), P('c:/a/d.xml')) self.assertEqual(P('c:a/Dot ending.').with_name('d.xml'), P('c:a/d.xml')) self.assertEqual(P('c:/a/Dot ending.').with_name('d.xml'), P('c:/a/d.xml')) + self.assertEqual(P('c:/etc/cron.d/').with_name('tron.g'), P('c:/etc/tron.g/')) self.assertRaises(ValueError, P('c:').with_name, 'd.xml') self.assertRaises(ValueError, P('c:/').with_name, 'd.xml') self.assertRaises(ValueError, P('//My/Share').with_name, 'd.xml') @@ -1251,6 +1408,7 @@ def test_with_stem(self): self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d')) self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d')) + self.assertEqual(P('c:/etc/cron.d/').with_stem('tron'), P('c:/etc/tron.d/')) self.assertRaises(ValueError, P('c:').with_stem, 'd') self.assertRaises(ValueError, P('c:/').with_stem, 'd') self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') @@ -1267,6 +1425,7 @@ def test_with_suffix(self): self.assertEqual(P('c:/a/b').with_suffix('.gz'), P('c:/a/b.gz')) self.assertEqual(P('c:a/b.py').with_suffix('.gz'), P('c:a/b.gz')) self.assertEqual(P('c:/a/b.py').with_suffix('.gz'), P('c:/a/b.gz')) + self.assertEqual(P('c:/etc/cron.d/').with_suffix('.g'), P('c:/etc/cron.g/')) # Path doesn't have a "filename" component. self.assertRaises(ValueError, P('').with_suffix, '.gz') self.assertRaises(ValueError, P('.').with_suffix, '.gz') From 5d87cf400d0aa49344cef8aacc0f7f89423eeee1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 4 Dec 2023 00:43:59 +0000 Subject: [PATCH 09/23] Docstrings --- Lib/pathlib.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index e64191a0af776d..0f1b03f5f27f32 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -213,13 +213,13 @@ class PurePath: # in the `__init__()` method. '_raw_paths', - # The `_drv`, `_root` and `_tail_cached` slots store parsed and - # normalized parts of the path. They are set when any of the `drive`, - # `root` or `_tail` properties are accessed for the first time. The - # three-part division corresponds to the result of - # `os.path.splitroot()`, except that the tail is further split on path - # separators (i.e. it is a list of strings), and that the root and - # tail are normalized. + # The `_drv`, `_root`, `_tail_cached` and `_has_trailing_sep` slots + # store parsed and normalized parts of the path. They are set when any + # of the `drive`, `root`, `_tail` or `has_trailing_sep` properties are + # accessed for the first time. The three-part division corresponds to + # the result of `os.path.splitroot()`, except that the tail is further + # split on path separators (i.e. it is a list of strings), and that + # the root and tail are normalized. '_drv', '_root', '_tail_cached', '_has_trailing_sep', # The `_str` slot stores the string representation of the path, @@ -358,6 +358,7 @@ def _tail(self): @property def has_trailing_sep(self): + """True if the path has a trailing slash after its name.""" try: return self._has_trailing_sep except AttributeError: @@ -443,16 +444,19 @@ def with_suffix(self, suffix): raise ValueError(f"Invalid suffix {suffix!r}") def without_trailing_sep(self): + """Return a new path without a trailing slash after its name.""" if not self.has_trailing_sep: return self return self._from_parsed_parts(self.drive, self.root, self._tail, False) def with_trailing_sep(self): + """Return a new path with a trailing slash after its name. If the + path has no name, ValueError is raised.""" if self.has_trailing_sep: return self tail = self._tail if not tail: - raise ValueError('empty name') + raise ValueError(f"{self!r} has an empty name") return self._from_parsed_parts(self.drive, self.root, tail, True) def relative_to(self, other, /, *_deprecated, walk_up=False): From af12c24bb4f2e27c1a834cc84b3f6f64f399cf37 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 4 Dec 2023 01:34:37 +0000 Subject: [PATCH 10/23] Docs --- Doc/library/pathlib.rst | 65 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 7ecfd120db8d15..4947f88d8785b9 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -145,6 +145,15 @@ we also call *flavours*: >>> PureWindowsPath('c:/Windows', '/Program Files') PureWindowsPath('c:/Program Files') + At most one trailing slash is kept:: + + >>> PurePath('foo//') + PurePosixPath('foo/') + + .. versionchanged:: 3.13 + A trailing slash is now retained, as it is meaningful to path + resolution. + Spurious slashes and single dots are collapsed, but double dots (``'..'``) and leading double slashes (``'//'``) are not, since this would change the meaning of a path for various reasons (e.g. symbolic links, UNC paths):: @@ -153,8 +162,8 @@ we also call *flavours*: PurePosixPath('foo/bar') >>> PurePath('//foo/bar') PurePosixPath('//foo/bar') - >>> PurePath('foo/./bar') - PurePosixPath('foo/bar') + >>> PurePath('foo/./bar/.') + PurePosixPath('foo/bar/') >>> PurePath('foo/../bar') PurePosixPath('foo/../bar') @@ -184,7 +193,7 @@ we also call *flavours*: filesystem paths, including `UNC paths`_:: >>> PureWindowsPath('c:/Program Files/') - PureWindowsPath('c:/Program Files') + PureWindowsPath('c:/Program Files/') >>> PureWindowsPath('//server/share/file') PureWindowsPath('//server/share/file') @@ -376,6 +385,21 @@ Pure paths provide the following methods and properties: '\\\\host\\share\\' +.. attribute:: PurePath.has_trailing_sep + + Whether the path has a trailing slash after its :attr:`name`:: + + >>> PurePosixPath('foo/bar/').has_trailing_sep + True + + If the path has no name, this property is false:: + + >>> PureWindowsPath('c:/').has_trailing_sep + False + + .. versionadded:: 3.13 + + .. attribute:: PurePath.parents An immutable sequence providing access to the logical ancestors of @@ -716,6 +740,41 @@ Pure paths provide the following methods and properties: PureWindowsPath('README') +.. method:: PurePath.with_trailing_sep() + + Return a new path with a trailing slash after its :attr:`name`. If the + original path doesn't have a name, :exc:`ValueError` is raised:: + + >>> p = PureWindowsPath('c:/windows') + >>> p.with_trailing_sep() + PureWindowsPath('c:/windows/') + >>> p = PureWindowsPath('c:/') + >>> p.with_trailing_sep() + Traceback (most recent call last): + File "", line 1, in + p.with_trailing_sep() + ~~~~~~~~~~~~~~~~~~~^^ + File "/home/barney/projects/cpython/Lib/pathlib.py", line 459, in with_trailing_sep + raise ValueError(f"{self!r} has an empty name") + ValueError: PureWindowsPath('c:/') has an empty name + + .. versionadded:: 3.13 + + +.. method:: PurePath.without_trailing_sep() + + Return a new path without a slash after its :attr:`name`, if any:: + + >>> p = PureWindowsPath('c:/windows/') + >>> p.without_trailing_sep() + PureWindowsPath('c:/windows') + >>> p = PureWindowsPath('c:/') + >>> p.without_trailing_sep() + PureWindowsPath('c:/') + + .. versionadded:: 3.13 + + .. method:: PurePath.with_segments(*pathsegments) Create a new path object of the same type by combining the given From cae377495adfc2937d88e611cab85500844b4596 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 4 Dec 2023 01:34:41 +0000 Subject: [PATCH 11/23] Ensure has_trailing_sep is boolean --- Lib/pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 0f1b03f5f27f32..921c46098b7b5e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -275,7 +275,7 @@ def _parse_path(cls, path): # e.g. //?/unc/server/share root = sep parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.'] - has_trailing_sep = parsed and not rel.endswith(parsed[-1]) + has_trailing_sep = bool(parsed) and not rel.endswith(parsed[-1]) return drv, root, parsed, has_trailing_sep def _load_parts(self): From d4c87c6462d7975d00374ce5995ba00a8c67fcb1 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 4 Dec 2023 02:02:32 +0000 Subject: [PATCH 12/23] Add tests for new methods/properties --- Lib/pathlib.py | 12 +++++------ Lib/test/test_pathlib.py | 45 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 6 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 921c46098b7b5e..826556c98a0ac1 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -443,12 +443,6 @@ def with_suffix(self, suffix): else: raise ValueError(f"Invalid suffix {suffix!r}") - def without_trailing_sep(self): - """Return a new path without a trailing slash after its name.""" - if not self.has_trailing_sep: - return self - return self._from_parsed_parts(self.drive, self.root, self._tail, False) - def with_trailing_sep(self): """Return a new path with a trailing slash after its name. If the path has no name, ValueError is raised.""" @@ -459,6 +453,12 @@ def with_trailing_sep(self): raise ValueError(f"{self!r} has an empty name") return self._from_parsed_parts(self.drive, self.root, tail, True) + def without_trailing_sep(self): + """Return a new path without a trailing slash after its name.""" + if not self.has_trailing_sep: + return self + return self._from_parsed_parts(self.drive, self.root, self._tail, False) + def relative_to(self, other, /, *_deprecated, walk_up=False): """Return the relative path to another path identified by the passed arguments. If the operation is not possible (because this is not diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 1d337f7abea6b2..c3c8a808577250 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -653,6 +653,51 @@ def test_with_suffix_common(self): self.assertRaises(ValueError, P('a/b').with_suffix, './.d') self.assertRaises(ValueError, P('a/b').with_suffix, '.d/.') + def test_has_trailing_sep(self): + P = self.cls + self.assertFalse(P().has_trailing_sep) + self.assertFalse(P('').has_trailing_sep) + self.assertFalse(P('.').has_trailing_sep) + self.assertFalse(P('a').has_trailing_sep) + self.assertTrue(P('a/').has_trailing_sep) + self.assertFalse(P('a/b').has_trailing_sep) + self.assertTrue(P('a/b/').has_trailing_sep) + self.assertFalse(P('/').has_trailing_sep) + self.assertFalse(P('/a').has_trailing_sep) + self.assertTrue(P('/a/').has_trailing_sep) + self.assertFalse(P('/a/b').has_trailing_sep) + self.assertTrue(P('/a/b/').has_trailing_sep) + + def test_with_trailing_sep(self): + P = self.cls + self.assertRaises(ValueError, P().with_trailing_sep) + self.assertRaises(ValueError, P('').with_trailing_sep) + self.assertRaises(ValueError, P('.').with_trailing_sep) + self.assertEqual(P('a/'), P('a').with_trailing_sep()) + self.assertEqual(P('a/'), P('a/').with_trailing_sep()) + self.assertEqual(P('a/b/'), P('a/b').with_trailing_sep()) + self.assertEqual(P('a/b/'), P('a/b/').with_trailing_sep()) + self.assertRaises(ValueError, P('/').with_trailing_sep) + self.assertEqual(P('/a/'), P('/a').with_trailing_sep()) + self.assertEqual(P('/a/'), P('/a/').with_trailing_sep()) + self.assertEqual(P('/a/b/'), P('/a/b').with_trailing_sep()) + self.assertEqual(P('/a/b/'), P('/a/b/').with_trailing_sep()) + + def test_without_trailing_sep(self): + P = self.cls + self.assertEqual(P(), P().without_trailing_sep()) + self.assertEqual(P(), P('').without_trailing_sep()) + self.assertEqual(P(), P('.').without_trailing_sep()) + self.assertEqual(P('a'), P('a').without_trailing_sep()) + self.assertEqual(P('a'), P('a/').without_trailing_sep()) + self.assertEqual(P('a/b'), P('a/b').without_trailing_sep()) + self.assertEqual(P('a/b'), P('a/b/').without_trailing_sep()) + self.assertEqual(P('/'), P('/').without_trailing_sep()) + self.assertEqual(P('/a'), P('/a').without_trailing_sep()) + self.assertEqual(P('/a'), P('/a/').without_trailing_sep()) + self.assertEqual(P('/a/b'), P('/a/b').without_trailing_sep()) + self.assertEqual(P('/a/b'), P('/a/b/').without_trailing_sep()) + def test_relative_to_common(self): P = self.cls p = P('a/b') From 1a77c8a745c4121d926d428b4cdba03b1ef122aa Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 4 Dec 2023 03:48:07 +0000 Subject: [PATCH 13/23] Fix WindowsPath('C:').absolute() --- Lib/pathlib.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 826556c98a0ac1..7e8c869031b41e 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1464,6 +1464,8 @@ def absolute(self): elif self.drive: # There is a CWD on each drive-letter drive. cwd = os.path.abspath(self.drive) + if not self._tail: + return self.with_segments(cwd) else: cwd = os.getcwd() # Fast path for "empty" paths, e.g. Path("."), Path("") or Path(). From df79c49ad49c8110823ec6a311265d4508cf7f24 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 7 Dec 2023 04:51:40 +0000 Subject: [PATCH 14/23] Don't access `.parts` from `._glob()` --- Lib/pathlib.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f863bc277187f6..b66a81d4067526 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1069,7 +1069,9 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): elif not path_pattern._tail: raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - pattern_parts = list(path_pattern.parts) + sep = self.pathmod.sep + pattern = str(path_pattern) + pattern_parts = pattern.split(sep) if pattern_parts[-1] == '**': # GH-70303: '**' only matches directories. Add trailing slash. warnings.warn( @@ -1092,7 +1094,6 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): # do not perform any filesystem access, which can be much faster! filter_paths = follow_symlinks is not None and '..' not in pattern_parts deduplicate_paths = False - sep = self.pathmod.sep if not self.is_dir(): paths = iter([]) elif not self._tail: @@ -1120,7 +1121,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): # Filter out paths that don't match pattern. prefix_len = len(str(self._make_child_relpath('_'))) - 1 - match = _compile_pattern(str(path_pattern), sep, case_sensitive) + match = _compile_pattern(pattern, sep, case_sensitive) paths = (path for path in paths if match(str(path), prefix_len)) return paths From 0f75804f52d2754c652e3faa4b7711cef374ab48 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 7 Dec 2023 04:54:09 +0000 Subject: [PATCH 15/23] Fix pickle roundtripping --- Lib/pathlib.py | 4 +--- Lib/test/test_pathlib.py | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index b66a81d4067526..469e709e361f81 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -643,9 +643,7 @@ def __init__(self, *args): self._resolving = False def __reduce__(self): - # Using the parts tuple helps share interned path parts - # when pickling related paths. - return (self.__class__, self.parts) + return (self.__class__, tuple(self._raw_paths)) def __fspath__(self): return str(self) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 55dc6b4158ef4c..13cbc37cd63d37 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -855,14 +855,16 @@ def test_is_relative_to_trailing_sep(self): def test_pickling_common(self): P = self.cls - p = P('/a/b') - for proto in range(0, pickle.HIGHEST_PROTOCOL + 1): - dumped = pickle.dumps(p, proto) - pp = pickle.loads(dumped) - self.assertIs(pp.__class__, p.__class__) - self.assertEqual(pp, p) - self.assertEqual(hash(pp), hash(p)) - self.assertEqual(str(pp), str(p)) + for pathstr in ('a', 'a/', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c', 'a/b/c/'): + with self.subTest(pathstr=pathstr): + p = P(pathstr) + for proto in range(0, pickle.HIGHEST_PROTOCOL + 1): + dumped = pickle.dumps(p, proto) + pp = pickle.loads(dumped) + self.assertIs(pp.__class__, p.__class__) + self.assertEqual(pp, p) + self.assertEqual(hash(pp), hash(p)) + self.assertEqual(str(pp), str(p)) def test_fspath_common(self): P = self.cls From eeb35ffa0472ca09364141064656eb1f786ccc7c Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 7 Dec 2023 04:58:25 +0000 Subject: [PATCH 16/23] Undo changes to `parts` --- Lib/pathlib.py | 8 +++----- Lib/test/test_pathlib.py | 16 ++++------------ 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 469e709e361f81..f181a4ce33a5e1 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -507,12 +507,10 @@ def is_relative_to(self, other, /, *_deprecated): def parts(self): """An object providing sequence-like access to the components in the filesystem path.""" - result = tuple(self._tail) if self.drive or self.root: - result = (self.drive + self.root,) + result - if self.has_trailing_sep: - result = result + ('',) - return result + return (self.drive + self.root,) + tuple(self._tail) + else: + return tuple(self._tail) def joinpath(self, *pathsegments): """Combine this path with one or several arguments, and return a diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 13cbc37cd63d37..442026b94066e3 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -180,11 +180,11 @@ def test_drive_root_parts_common(self): # Unanchored parts. check((), '', '', ()) check(('a',), '', '', ('a',)) - check(('a/',), '', '', ('a', '')) + check(('a/',), '', '', ('a',)) check(('a', 'b'), '', '', ('a', 'b')) # Expansion. check(('a/b',), '', '', ('a', 'b')) - check(('a/b/',), '', '', ('a', 'b', '')) + check(('a/b/',), '', '', ('a', 'b')) check(('a', 'b/c', 'd'), '', '', ('a', 'b', 'c', 'd')) # Collapsing and stripping excess slashes. check(('a', 'b//c', 'd'), '', '', ('a', 'b', 'c', 'd')) @@ -193,7 +193,7 @@ def test_drive_root_parts_common(self): check(('.',), '', '', ()) check(('.', '.', 'b'), '', '', ('b',)) check(('a', '.', 'b'), '', '', ('a', 'b')) - check(('a', '.', '.'), '', '', ('a', '')) + check(('a', '.', '.'), '', '', ('a',)) # The first part is anchored. check(('/a/b',), '', sep, (sep, 'a', 'b')) check(('/a', 'b'), '', sep, (sep, 'a', 'b')) @@ -385,10 +385,6 @@ def test_parts_common(self): p = P('/a/b') parts = p.parts self.assertEqual(parts, (sep, 'a', 'b')) - # When the path has a trailing separator, an additional empty part is present. - p = P('a/b/') - parts = p.parts - self.assertEqual(parts, ('a', 'b', '')) def test_equivalences(self): for k, tuples in self.equivalences.items(): @@ -1085,7 +1081,7 @@ def test_drive_root_parts(self): # UNC paths. check(('a', '//b/c', 'd'), '\\\\b\\c', '\\', ('\\\\b\\c\\', 'd')) # Collapsing and stripping excess slashes. - check(('a', 'Z://b//c/', 'd/'), 'Z:', '\\', ('Z:\\', 'b', 'c', 'd', '')) + check(('a', 'Z://b//c/', 'd/'), 'Z:', '\\', ('Z:\\', 'b', 'c', 'd')) # UNC paths. check(('a', '//b/c//', 'd'), '\\\\b\\c', '\\', ('\\\\b\\c\\', 'd')) # Extended paths. @@ -1240,10 +1236,6 @@ def test_parts(self): p = P('//a/b/c/d') parts = p.parts self.assertEqual(parts, ('\\\\a\\b\\', 'c', 'd')) - # Trailing sep - p = P('c:/a/b/') - parts = p.parts - self.assertEqual(parts, ('c:\\', 'a', 'b', '')) def test_parent(self): # Anchored From b852339e2f1ed74b5a03ce454cee5b13efed508d Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 7 Dec 2023 23:12:08 +0000 Subject: [PATCH 17/23] Simplify `_glob()` slightly --- Lib/pathlib.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index f181a4ce33a5e1..b848bcd33b2c06 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1065,21 +1065,21 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): elif not path_pattern._tail: raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - sep = self.pathmod.sep - pattern = str(path_pattern) - pattern_parts = pattern.split(sep) - if pattern_parts[-1] == '**': + if path_pattern.name == '**' and not path_pattern.has_trailing_sep: # GH-70303: '**' only matches directories. Add trailing slash. warnings.warn( "Pattern ending '**' will match files and directories in a " "future Python release. Add a trailing slash to match only " "directories and remove this warning.", FutureWarning, 3) - pattern_parts.append('') + path_pattern = path_pattern.with_trailing_sep() if case_sensitive is None: # TODO: evaluate case-sensitivity of each directory in _select_children(). case_sensitive = _is_case_sensitive(self.pathmod) + sep = self.pathmod.sep + pattern_str = str(path_pattern) + pattern_parts = pattern_str.split(sep) # If symlinks are handled consistently, and the pattern does not # contain '..' components, then we can use a 'walk-and-match' strategy @@ -1117,7 +1117,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): # Filter out paths that don't match pattern. prefix_len = len(str(self._make_child_relpath('_'))) - 1 - match = _compile_pattern(pattern, sep, case_sensitive) + match = _compile_pattern(pattern_str, sep, case_sensitive) paths = (path for path in paths if match(str(path), prefix_len)) return paths From 2b77a61faf44ccce7fbc92279fcf3a3d0ba034f5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 7 Dec 2023 23:17:19 +0000 Subject: [PATCH 18/23] Fix possible scoping issue in `_glob()` --- Lib/pathlib.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index b848bcd33b2c06..644a784bbcbbc9 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -92,6 +92,12 @@ def _compile_pattern(pat, sep, case_sensitive): return re.compile(regex, flags=flags).match +def _select_parents(paths, dir_only): + """Yield lexical parents of the given paths.""" + for path in paths: + yield path._make_child_relpath('..', dir_only) + + def _select_children(parent_paths, dir_only, follow_symlinks, match): """Yield direct children of given paths, filtering by name and type.""" if follow_symlinks is None: @@ -1105,7 +1111,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): pass elif part == '..': dir_only = part_idx < len(pattern_parts) - paths = (path._make_child_relpath('..', dir_only) for path in paths) + paths = _select_parents(paths, dir_only) elif part == '**': # Consume adjacent '**' components. while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**': From 7b6766f59c0f7efa3ab7bde42f1c6649ec25f960 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 8 Dec 2023 14:11:45 +0000 Subject: [PATCH 19/23] Formatting --- Lib/pathlib.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 644a784bbcbbc9..4423c6d9586896 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -219,14 +219,18 @@ class PurePath: # in the `__init__()` method. '_raw_paths', - # The `_drv`, `_root`, `_tail_cached` and `_has_trailing_sep` slots - # store parsed and normalized parts of the path. They are set when any - # of the `drive`, `root`, `_tail` or `has_trailing_sep` properties are - # accessed for the first time. The three-part division corresponds to - # the result of `os.path.splitroot()`, except that the tail is further - # split on path separators (i.e. it is a list of strings), and that - # the root and tail are normalized. - '_drv', '_root', '_tail_cached', '_has_trailing_sep', + # The `_drv`, `_root` and `_tail_cached` slots store parsed and + # normalized parts of the path. They are set when any of the `drive`, + # `root` or `_tail` properties are accessed for the first time. The + # three-part division corresponds to the result of + # `os.path.splitroot()`, except that the tail is further split on path + # separators (i.e. it is a list of strings), and that the root and + # tail are normalized. + '_drv', '_root', '_tail_cached', + + # The `_has_trailing_sep` slot stores a boolean indicating whether a + # trailing slash follows the path name. + '_has_trailing_sep', # The `_str` slot stores the string representation of the path, # computed from the drive, root and tail when `__str__()` is called @@ -465,6 +469,12 @@ def without_trailing_sep(self): return self return self._from_parsed_parts(self.drive, self.root, self._tail, False) + @property + def _ancestors(self): + """Yields this path (sans trailing slash) and its parents.""" + yield self.without_trailing_sep() + yield from _PathParents(self) + def relative_to(self, other, /, *_deprecated, walk_up=False): """Return the relative path to another path identified by the passed arguments. If the operation is not possible (because this is not @@ -557,11 +567,6 @@ def parents(self): # as doing so would introduce a reference cycle. return _PathParents(self) - @property - def _ancestors(self): - yield self.without_trailing_sep() - yield from _PathParents(self) - def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" @@ -1070,8 +1075,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): raise NotImplementedError("Non-relative patterns are unsupported") elif not path_pattern._tail: raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - - if path_pattern.name == '**' and not path_pattern.has_trailing_sep: + elif path_pattern.name == '**' and not path_pattern.has_trailing_sep: # GH-70303: '**' only matches directories. Add trailing slash. warnings.warn( "Pattern ending '**' will match files and directories in a " @@ -1083,6 +1087,7 @@ def _glob(self, pattern, case_sensitive, follow_symlinks): if case_sensitive is None: # TODO: evaluate case-sensitivity of each directory in _select_children(). case_sensitive = _is_case_sensitive(self.pathmod) + sep = self.pathmod.sep pattern_str = str(path_pattern) pattern_parts = pattern_str.split(sep) From f5c226577f66c24349d9127c1fb5dbb85bfde0a4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 8 Dec 2023 14:21:55 +0000 Subject: [PATCH 20/23] Add test cases for preserving slash in absolute() and expanduser() --- Lib/test/test_pathlib.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index cc1066036d29a4..8f51e41cca93f1 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -3039,6 +3039,9 @@ def test_absolute_common(self): self.assertEqual(str(P('a', '..').absolute()), os.path.join(BASE, 'a', '..')) self.assertEqual(str(P('..', 'b').absolute()), os.path.join(BASE, '..', 'b')) + # Trailing slash should be preserved + self.assertEqual(str(P('a/').absolute()), os.path.join(BASE, 'a', '')) + def _test_home(self, p): q = self.cls(os.path.expanduser('~')) self.assertEqual(p, q) @@ -3066,6 +3069,12 @@ def test_expanduser_common(self): P = self.cls p = P('~') self.assertEqual(p.expanduser(), P(os.path.expanduser('~'))) + p = P('~/') + self.assertEqual(p.expanduser(), P(os.path.expanduser('~/'))) + p = P('~/foo') + self.assertEqual(p.expanduser(), P(os.path.expanduser('~/foo'))) + p = P('~/foo/') + self.assertEqual(p.expanduser(), P(os.path.expanduser('~/foo/'))) p = P('foo') self.assertEqual(p.expanduser(), p) p = P('/~') @@ -3845,10 +3854,12 @@ def test_absolute(self): # Relative path with root self.assertEqual(str(P('\\').absolute()), drive + '\\') self.assertEqual(str(P('\\foo').absolute()), drive + '\\foo') + self.assertEqual(str(P('\\foo\\').absolute()), drive + '\\foo\\') # Relative path on current drive self.assertEqual(str(P(drive).absolute()), BASE) self.assertEqual(str(P(drive + 'foo').absolute()), os.path.join(BASE, 'foo')) + self.assertEqual(str(P(drive + 'foo\\').absolute()), os.path.join(BASE, 'foo\\')) with os_helper.subst_drive(BASE) as other_drive: # Set the working directory on the substitute drive @@ -3860,6 +3871,7 @@ def test_absolute(self): # Relative path on another drive self.assertEqual(str(P(other_drive).absolute()), other_cwd) self.assertEqual(str(P(other_drive + 'foo').absolute()), other_cwd + '\\foo') + self.assertEqual(str(P(other_drive + 'foo\\').absolute()), other_cwd + '\\foo\\') def test_glob(self): P = self.cls From 11261172eb026205fd36c12415d6e90a90df8b57 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 8 Dec 2023 17:22:42 +0000 Subject: [PATCH 21/23] Add notes to `match()` and `glob()` docs --- Doc/library/pathlib.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index e6ac294b135931..6c4a7d5b02e859 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -635,6 +635,10 @@ Pure paths provide the following methods and properties: Support for the recursive wildcard "``**``" was added. In previous versions, it acted like the non-recursive wildcard "``*``". + .. versionchanged:: 3.13 + Matching now considers whether the path and *pattern* end with path + separators. + .. method:: PurePath.relative_to(other, walk_up=False) @@ -1076,6 +1080,11 @@ call fails (for example because the path doesn't exist). future Python release, patterns with this ending will match both files and directories. Add a trailing slash to match only directories. + .. versionchanged:: 3.13 + Returns paths with trailing path separators if *pattern* also ends with + a pathname components separator (:data:`~os.sep` or :data:`~os.altsep`). + + .. method:: Path.group(*, follow_symlinks=True) Return the name of the group owning the file. :exc:`KeyError` is raised From 305381b32521ae1e5e0702f8b89f3cf12ecdbb3e Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 17 Dec 2023 02:12:47 +0000 Subject: [PATCH 22/23] Undo pickling change --- Lib/pathlib/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 7e01eda916aeb5..84f0f6d48e35d4 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -98,7 +98,12 @@ def __init__(self, *args): def __reduce__(self): # Using the parts tuple helps share interned path parts # when pickling related paths. - return (self.__class__, tuple(self._raw_paths)) + args = tuple(self._tail) + if self.drive or self.root: + args = (self.drive + self.root,) + args + if self.has_trailing_sep: + args = args + ('.',) + return (self.__class__, args) def __fspath__(self): return str(self) From dfe7aae5884258b3f85d675a3b83cd1b44133bda Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 18 Dec 2023 05:42:16 +0000 Subject: [PATCH 23/23] Undo pickling changes. --- Lib/pathlib/__init__.py | 7 +------ Lib/test/test_pathlib/test_pathlib.py | 18 ++++++++---------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index 84f0f6d48e35d4..0d8a84040d6bc7 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -98,12 +98,7 @@ def __init__(self, *args): def __reduce__(self): # Using the parts tuple helps share interned path parts # when pickling related paths. - args = tuple(self._tail) - if self.drive or self.root: - args = (self.drive + self.root,) + args - if self.has_trailing_sep: - args = args + ('.',) - return (self.__class__, args) + return (self.__class__, self.parts) def __fspath__(self): return str(self) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index a089a3b8919f9f..408a90875d1f14 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -60,16 +60,14 @@ def test_div_nested(self): def test_pickling_common(self): P = self.cls - for pathstr in ('a', 'a/', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c', 'a/b/c/'): - with self.subTest(pathstr=pathstr): - p = P(pathstr) - for proto in range(0, pickle.HIGHEST_PROTOCOL + 1): - dumped = pickle.dumps(p, proto) - pp = pickle.loads(dumped) - self.assertIs(pp.__class__, p.__class__) - self.assertEqual(pp, p) - self.assertEqual(hash(pp), hash(p)) - self.assertEqual(str(pp), str(p)) + p = P('/a/b') + for proto in range(0, pickle.HIGHEST_PROTOCOL + 1): + dumped = pickle.dumps(p, proto) + pp = pickle.loads(dumped) + self.assertIs(pp.__class__, p.__class__) + self.assertEqual(pp, p) + self.assertEqual(hash(pp), hash(p)) + self.assertEqual(str(pp), str(p)) def test_fspath_common(self): P = self.cls