From e2b4c6d7996c1c406687bd9a8f000ac414b6ed3a Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 12 Aug 2024 01:10:38 +0100 Subject: [PATCH 1/9] GH-73991: Disallow copying directory into itself via `pathlib.Path.copy()` --- Lib/pathlib/_abc.py | 38 +++++++++++++++++++---- Lib/test/test_pathlib/test_pathlib_abc.py | 25 +++++++++++++++ 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 500846d19cf811..e835351ca2a10e 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -11,6 +11,7 @@ resemble pathlib's PurePath and Path respectively. """ +import errno import functools import operator import posixpath @@ -564,14 +565,33 @@ def samefile(self, other_path): return (st.st_ino == other_st.st_ino and st.st_dev == other_st.st_dev) - def _samefile_safe(self, other_path): + def _ensure_different_file(self, other_path): """ - Like samefile(), but returns False rather than raising OSError. + Raise OSError(EINVAL) if both paths refer to the same file. """ try: - return self.samefile(other_path) + if not self.samefile(other_path): + return except (OSError, ValueError): - return False + return + err = OSError(errno.EINVAL, "Source and target are the same file") + err.filename = str(self) + err.filename2 = str(other_path) + raise err + + def _ensure_distinct_path(self, other_path): + """ + Raise OSError(EINVAL) if the other path is within this path. + """ + if self == other_path: + err = OSError(errno.EINVAL, "Source and target are the same path") + elif self in other_path.parents: + err = OSError(errno.EINVAL, "Source path is a parent of target path") + else: + return + err.filename = str(self) + err.filename2 = str(other_path) + raise err def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): @@ -826,8 +846,7 @@ def _copy_file(self, target): """ Copy the contents of this file to the given target. """ - if self._samefile_safe(target): - raise OSError(f"{self!r} and {target!r} are the same file") + self._ensure_different_file(target) with self.open('rb') as source_f: try: with target.open('wb') as target_f: @@ -847,6 +866,13 @@ def copy(self, target, *, follow_symlinks=True, dirs_exist_ok=False, """ if not isinstance(target, PathBase): target = self.with_segments(target) + try: + self._ensure_distinct_path(target) + except OSError as err: + if on_error is None: + raise + on_error(err) + return stack = [(self, target)] while stack: src, dst = stack.pop() diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 629a1d4bdeb4de..ea752dc7134e97 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1823,6 +1823,12 @@ def test_copy_file_empty(self): self.assertTrue(target.exists()) self.assertEqual(target.read_bytes(), b'') + def test_copy_file_to_itself(self): + base = self.cls(self.base) + source = base / 'empty' + source.write_bytes(b'') + self.assertRaises(OSError, source.copy, source) + def test_copy_dir_simple(self): base = self.cls(self.base) source = base / 'dirC' @@ -1909,6 +1915,25 @@ def test_copy_dir_to_existing_directory_dirs_exist_ok(self): self.assertTrue(target.joinpath('fileC').read_text(), "this is file C\n") + def test_copy_dir_to_itself(self): + base = self.cls(self.base) + source = base / 'dirC' + self.assertRaises(OSError, source.copy, source) + + def test_copy_dir_to_itself_on_error(self): + base = self.cls(self.base) + source = base / 'dirC' + errors = [] + source.copy(source, on_error=errors.append) + self.assertEqual(len(errors), 1) + self.assertIsInstance(errors[0], OSError) + + def test_copy_dir_into_itself(self): + base = self.cls(self.base) + source = base / 'dirC' + target = base / 'dirC' / 'dirD' / 'copyC' + self.assertRaises(OSError, source.copy, target) + def test_copy_missing_on_error(self): base = self.cls(self.base) source = base / 'foo' From d646ad23c72cefcc8d78317a730c94351b64787e Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 12 Aug 2024 13:37:12 +0100 Subject: [PATCH 2/9] Tweak import --- Lib/pathlib/_abc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index e835351ca2a10e..262f323b442889 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -11,10 +11,10 @@ resemble pathlib's PurePath and Path respectively. """ -import errno import functools import operator import posixpath +from errno import EINVAL from glob import _GlobberBase, _no_recurse_symlinks from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from pathlib._os import copyfileobj @@ -574,7 +574,7 @@ def _ensure_different_file(self, other_path): return except (OSError, ValueError): return - err = OSError(errno.EINVAL, "Source and target are the same file") + err = OSError(EINVAL, "Source and target are the same file") err.filename = str(self) err.filename2 = str(other_path) raise err @@ -584,9 +584,9 @@ def _ensure_distinct_path(self, other_path): Raise OSError(EINVAL) if the other path is within this path. """ if self == other_path: - err = OSError(errno.EINVAL, "Source and target are the same path") + err = OSError(EINVAL, "Source and target are the same path") elif self in other_path.parents: - err = OSError(errno.EINVAL, "Source path is a parent of target path") + err = OSError(EINVAL, "Source path is a parent of target path") else: return err.filename = str(self) From c5a79d043e7a69ec23a637c66e70ece8ccc091f6 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 12 Aug 2024 13:53:11 +0100 Subject: [PATCH 3/9] Expand tests --- Lib/test/test_pathlib/test_pathlib_abc.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index ea752dc7134e97..49e444cc550a82 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1758,6 +1758,12 @@ def test_copy_symlink_follow_symlinks_false(self): self.assertTrue(target.is_symlink()) self.assertEqual(source.readlink(), target.readlink()) + @needs_symlinks + def test_copy_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkA' + self.assertRaises(OSError, source.copy, source) + @needs_symlinks def test_copy_directory_symlink_follow_symlinks_false(self): base = self.cls(self.base) @@ -1769,6 +1775,20 @@ def test_copy_directory_symlink_follow_symlinks_false(self): self.assertTrue(target.is_symlink()) self.assertEqual(source.readlink(), target.readlink()) + @needs_symlinks + def test_copy_directory_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + self.assertRaises(OSError, source.copy, source) + + @needs_symlinks + def test_copy_directory_symlink_into_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + target = base / 'linkB' / 'copyB' + self.assertRaises(OSError, source.copy, target) + self.assertFalse(target.exists()) + def test_copy_file_to_existing_file(self): base = self.cls(self.base) source = base / 'fileA' @@ -1933,6 +1953,7 @@ def test_copy_dir_into_itself(self): source = base / 'dirC' target = base / 'dirC' / 'dirD' / 'copyC' self.assertRaises(OSError, source.copy, target) + self.assertFalse(target.exists()) def test_copy_missing_on_error(self): base = self.cls(self.base) From f58c07a9fc77584b9faf2b0de3ec2d6cdddffd96 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 13 Aug 2024 18:07:55 +0100 Subject: [PATCH 4/9] Add comment to _ensure_distinct_path() --- Lib/pathlib/_abc.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 262f323b442889..e2009ee5e3d872 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -583,6 +583,11 @@ def _ensure_distinct_path(self, other_path): """ Raise OSError(EINVAL) if the other path is within this path. """ + # Note: there is no straightforward, foolproof algorithm to determine + # if one directory is within another. A particularly perverse example: + # consider a single network share mounted in one location via NFS, and + # in another location via CIFS. This method simply checks whether the + # other_path is lexically equal to, or within, this path. if self == other_path: err = OSError(EINVAL, "Source and target are the same path") elif self in other_path.parents: From b119e3d37dc197ce2741dbcf6ccc6eb5ded8dcc3 Mon Sep 17 00:00:00 2001 From: barneygale Date: Tue, 13 Aug 2024 18:15:31 +0100 Subject: [PATCH 5/9] Improve wording --- Lib/pathlib/_abc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index e2009ee5e3d872..ef3ed7e68fd293 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -584,10 +584,10 @@ def _ensure_distinct_path(self, other_path): Raise OSError(EINVAL) if the other path is within this path. """ # Note: there is no straightforward, foolproof algorithm to determine - # if one directory is within another. A particularly perverse example: - # consider a single network share mounted in one location via NFS, and - # in another location via CIFS. This method simply checks whether the - # other_path is lexically equal to, or within, this path. + # if one directory is within another (a particularly perverse example + # would be a single network share mounted in one location via NFS, and + # in another location via CIFS), so we simply checks whether the + # other path is lexically equal to, or within, this path. if self == other_path: err = OSError(EINVAL, "Source and target are the same path") elif self in other_path.parents: From 32530b704f6b795ccc1dca958fd921e08d7417ed Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 18 Aug 2024 17:03:24 +0100 Subject: [PATCH 6/9] Add tests for symlink-to-other-symlink copies. --- Lib/test/test_pathlib/test_pathlib_abc.py | 65 ++++++++++++++++++++--- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 49e444cc550a82..b781e94c628bd0 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1501,19 +1501,20 @@ def iterdir(self): raise FileNotFoundError(errno.ENOENT, "File not found", path) def mkdir(self, mode=0o777, parents=False, exist_ok=False): - path = str(self.resolve()) - if path in self._directories: + path = str(self.parent.resolve() / self.name) + parent = str(self.parent.resolve()) + if path in self._directories or path in self._symlinks: if exist_ok: return else: raise FileExistsError(errno.EEXIST, "File exists", path) try: if self.name: - self._directories[str(self.parent)].add(self.name) + self._directories[parent].add(self.name) self._directories[path] = set() except KeyError: if not parents: - raise FileNotFoundError(errno.ENOENT, "File not found", str(self.parent)) from None + raise FileNotFoundError(errno.ENOENT, "File not found", parent) from None self.parent.mkdir(parents=True, exist_ok=True) self.mkdir(mode, parents=False, exist_ok=exist_ok) @@ -1764,6 +1765,30 @@ def test_copy_symlink_to_itself(self): source = base / 'linkA' self.assertRaises(OSError, source.copy, source) + @needs_symlinks + def test_copy_symlink_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'fileA') + target.symlink_to(base / 'dirC') + with self.assertRaises(IsADirectoryError): + source.copy(target) + with self.assertRaises(FileExistsError): + source.copy(target, follow_symlinks=False) + + @needs_symlinks + def test_copy_symlink_to_existing_directory_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'fileA') + target.symlink_to(base / 'dirC') + with self.assertRaises(IsADirectoryError): + source.copy(target) + with self.assertRaises(FileExistsError): + source.copy(target, follow_symlinks=False) + @needs_symlinks def test_copy_directory_symlink_follow_symlinks_false(self): base = self.cls(self.base) @@ -1789,6 +1814,30 @@ def test_copy_directory_symlink_into_itself(self): self.assertRaises(OSError, source.copy, target) self.assertFalse(target.exists()) + @needs_symlinks + def test_copy_directory_symlink_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'dirC') + target.symlink_to(base / 'fileA') + with self.assertRaises(FileExistsError): + source.copy(target) + with self.assertRaises(FileExistsError): + source.copy(target, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_to_existing_directory_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'dirC' / 'dirD') + target.symlink_to(base / 'dirC') + with self.assertRaises(FileExistsError): + source.copy(target) + with self.assertRaises(FileExistsError): + source.copy(target, follow_symlinks=False) + def test_copy_file_to_existing_file(self): base = self.cls(self.base) source = base / 'fileA' @@ -2922,8 +2971,12 @@ def readlink(self): raise FileNotFoundError(errno.ENOENT, "File not found", path) def symlink_to(self, target, target_is_directory=False): - self._directories[str(self.parent)].add(self.name) - self._symlinks[str(self)] = str(target) + path = str(self.parent.resolve() / self.name) + parent = str(self.parent.resolve()) + if path in self._symlinks: + raise FileExistsError(errno.EEXIST, "File exists", path) + self._directories[parent].add(self.name) + self._symlinks[path] = str(target) class DummyPathWithSymlinksTest(DummyPathTest): From a15f248465b90a5992a37067c53d91a6b13daac6 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 18 Aug 2024 17:28:32 +0100 Subject: [PATCH 7/9] Fix windows tests --- Lib/test/test_pathlib/test_pathlib_abc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index b781e94c628bd0..c0359628d75269 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1772,7 +1772,7 @@ def test_copy_symlink_to_existing_symlink(self): target = base / 'copyTarget' source.symlink_to(base / 'fileA') target.symlink_to(base / 'dirC') - with self.assertRaises(IsADirectoryError): + with self.assertRaises(OSError): source.copy(target) with self.assertRaises(FileExistsError): source.copy(target, follow_symlinks=False) @@ -1784,7 +1784,7 @@ def test_copy_symlink_to_existing_directory_symlink(self): target = base / 'copyTarget' source.symlink_to(base / 'fileA') target.symlink_to(base / 'dirC') - with self.assertRaises(IsADirectoryError): + with self.assertRaises(OSError): source.copy(target) with self.assertRaises(FileExistsError): source.copy(target, follow_symlinks=False) From d371b89f8955aafdf55823fa4c0feb7de6b6ef1b Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 18 Aug 2024 18:21:44 +0100 Subject: [PATCH 8/9] More Windows goodness --- Lib/test/test_pathlib/test_pathlib_abc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index c0359628d75269..d5eda793ae23ab 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1774,7 +1774,7 @@ def test_copy_symlink_to_existing_symlink(self): target.symlink_to(base / 'dirC') with self.assertRaises(OSError): source.copy(target) - with self.assertRaises(FileExistsError): + with self.assertRaises(OSError): source.copy(target, follow_symlinks=False) @needs_symlinks @@ -1786,7 +1786,7 @@ def test_copy_symlink_to_existing_directory_symlink(self): target.symlink_to(base / 'dirC') with self.assertRaises(OSError): source.copy(target) - with self.assertRaises(FileExistsError): + with self.assertRaises(OSError): source.copy(target, follow_symlinks=False) @needs_symlinks From 7f5b1abcc58f0c900ff60911ed5f4f07b7576874 Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 23 Aug 2024 19:13:29 +0100 Subject: [PATCH 9/9] Address review feedback --- Lib/test/test_pathlib/test_pathlib_abc.py | 32 ++++++++++------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index d5eda793ae23ab..539b2eba64f0a3 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1772,10 +1772,8 @@ def test_copy_symlink_to_existing_symlink(self): target = base / 'copyTarget' source.symlink_to(base / 'fileA') target.symlink_to(base / 'dirC') - with self.assertRaises(OSError): - source.copy(target) - with self.assertRaises(OSError): - source.copy(target, follow_symlinks=False) + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) @needs_symlinks def test_copy_symlink_to_existing_directory_symlink(self): @@ -1784,10 +1782,8 @@ def test_copy_symlink_to_existing_directory_symlink(self): target = base / 'copyTarget' source.symlink_to(base / 'fileA') target.symlink_to(base / 'dirC') - with self.assertRaises(OSError): - source.copy(target) - with self.assertRaises(OSError): - source.copy(target, follow_symlinks=False) + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) @needs_symlinks def test_copy_directory_symlink_follow_symlinks_false(self): @@ -1805,6 +1801,7 @@ def test_copy_directory_symlink_to_itself(self): base = self.cls(self.base) source = base / 'linkB' self.assertRaises(OSError, source.copy, source) + self.assertRaises(OSError, source.copy, source, follow_symlinks=False) @needs_symlinks def test_copy_directory_symlink_into_itself(self): @@ -1812,6 +1809,7 @@ def test_copy_directory_symlink_into_itself(self): source = base / 'linkB' target = base / 'linkB' / 'copyB' self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) self.assertFalse(target.exists()) @needs_symlinks @@ -1821,10 +1819,8 @@ def test_copy_directory_symlink_to_existing_symlink(self): target = base / 'copyTarget' source.symlink_to(base / 'dirC') target.symlink_to(base / 'fileA') - with self.assertRaises(FileExistsError): - source.copy(target) - with self.assertRaises(FileExistsError): - source.copy(target, follow_symlinks=False) + self.assertRaises(FileExistsError, source.copy, target) + self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) @needs_symlinks def test_copy_directory_symlink_to_existing_directory_symlink(self): @@ -1833,10 +1829,8 @@ def test_copy_directory_symlink_to_existing_directory_symlink(self): target = base / 'copyTarget' source.symlink_to(base / 'dirC' / 'dirD') target.symlink_to(base / 'dirC') - with self.assertRaises(FileExistsError): - source.copy(target) - with self.assertRaises(FileExistsError): - source.copy(target, follow_symlinks=False) + self.assertRaises(FileExistsError, source.copy, target) + self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) def test_copy_file_to_existing_file(self): base = self.cls(self.base) @@ -1851,8 +1845,7 @@ def test_copy_file_to_existing_directory(self): base = self.cls(self.base) source = base / 'fileA' target = base / 'dirA' - with self.assertRaises(OSError): - source.copy(target) + self.assertRaises(OSError, source.copy, target) @needs_symlinks def test_copy_file_to_existing_symlink(self): @@ -1897,6 +1890,7 @@ def test_copy_file_to_itself(self): source = base / 'empty' source.write_bytes(b'') self.assertRaises(OSError, source.copy, source) + self.assertRaises(OSError, source.copy, source, follow_symlinks=False) def test_copy_dir_simple(self): base = self.cls(self.base) @@ -1988,6 +1982,7 @@ def test_copy_dir_to_itself(self): base = self.cls(self.base) source = base / 'dirC' self.assertRaises(OSError, source.copy, source) + self.assertRaises(OSError, source.copy, source, follow_symlinks=False) def test_copy_dir_to_itself_on_error(self): base = self.cls(self.base) @@ -2002,6 +1997,7 @@ def test_copy_dir_into_itself(self): source = base / 'dirC' target = base / 'dirC' / 'dirD' / 'copyC' self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) self.assertFalse(target.exists()) def test_copy_missing_on_error(self):