From 873fcc6137a6131a9bd5e30ce91a09f20a06b0f9 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 19 Jun 2024 03:20:19 +0100 Subject: [PATCH 1/4] GH-73991: Add `pathlib.Path.copytree()` Add `pathlib.Path.copytree()` method, which recursively copies one directory to another. This differs from `shutil.copytree()` in the following respects: 1. Our method has a *follow_symlinks* argument, whereas shutil's has a *symlinks* argument with an inverted meaning. 2. Our method lacks something like a *copy_function* argument. It always uses `Path.copy()` to copy files. 3. Our method lacks something like a *ignore_dangling_symlinks* argument. Instead, users can filter out danging symlinks with *ignore*, or ignore exceptions with *on_error* 4. Our *ignore* argument is a callable that accepts a single path object, whereas shutil's accepts a path and a list of child filenames. 5. We add an *on_error* argument, which is a callable that accepts an `OSError` instance. (`Path.walk()` also accepts such a callable). --- Doc/library/pathlib.rst | 27 +++ Doc/whatsnew/3.14.rst | 3 + Lib/pathlib/_abc.py | 30 ++++ Lib/test/test_pathlib/test_pathlib.py | 19 +- Lib/test/test_pathlib/test_pathlib_abc.py | 164 ++++++++++++++++++ ...4-06-19-03-09-11.gh-issue-73991.lU_jK9.rst | 1 + 6 files changed, 242 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-19-03-09-11.gh-issue-73991.lU_jK9.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 5bfcad0dadff6a..b6c007a1fc1d53 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1455,6 +1455,33 @@ Copying, renaming and deleting .. versionadded:: 3.14 +.. method:: Path.copytree(target, *, follow_symlinks=True, dirs_exist_ok=False, + ignore=None, on_error=None) + + Recursively copy this directory tree to the given destination. + + If a symlink is encountered in the source tree, and *follow_symlinks* is + true (the default), the symlink's target is copied. Otherwise, the symlink + is recreated in the destination tree. + + If the destination is an existing directory and *dirs_exist_ok* is false + (the default), a :exc:`FileExistsError` is raised. Otherwise, the copying + operation will continue if it encounters existing directories, and files + within the destination tree will be overwritten by corresponding files from + the source tree. + + If *ignore* is given, it should be a callable accepting one argument: a + file or directory path within the source tree. The callable may return true + to suppress copying of the path. + + If *on_error* is given, it should be a callable accepting one argument: an + instance of :exc:`OSError`. The callable may re-raise the exception or do + nothing, in which case the copying operation continues. If *on_error* isn't + given, exceptions are propagated to the caller. + + .. versionadded:: 3.14 + + .. method:: Path.rename(target) Rename this file or directory to the given *target*, and return a new diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 804d39ab64646d..16a63b0c00fa7f 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -106,6 +106,9 @@ pathlib * Add :meth:`pathlib.Path.copy`, which copies the content of one file to another, like :func:`shutil.copyfile`. (Contributed by Barney Gale in :gh:`73991`.) +* Add :meth:`pathlib.Path.copytree`, which copies one directory tree to + another. + (Contributed by Barney Gale in :gh:`73991`.) symtable -------- diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index f1f350a196091a..71973913921169 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -815,6 +815,36 @@ def copy(self, target, follow_symlinks=True): else: raise + def copytree(self, target, *, follow_symlinks=True, dirs_exist_ok=False, + ignore=None, on_error=None): + """ + Recursively copy this directory tree to the given destination. + """ + if not isinstance(target, PathBase): + target = self.with_segments(target) + if on_error is None: + def on_error(err): + raise err + stack = [(self, target)] + while stack: + source_dir, target_dir = stack.pop() + try: + sources = source_dir.iterdir() + target_dir.mkdir(exist_ok=dirs_exist_ok) + for source in sources: + if ignore and ignore(source): + continue + try: + if source.is_dir(follow_symlinks=follow_symlinks): + stack.append((source, target_dir.joinpath(source.name))) + else: + source.copy(target_dir.joinpath(source.name), + follow_symlinks=follow_symlinks) + except OSError as err: + on_error(err) + except OSError as err: + on_error(err) + def rename(self, target): """ Rename this path to the target path. diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 89af1f7581764f..2db33be94c6270 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -513,14 +513,17 @@ def setUp(self): os.chmod(self.parser.join(self.base, 'dirE'), 0) def tearDown(self): - os.chmod(self.parser.join(self.base, 'dirE'), 0o777) + try: + os.chmod(self.parser.join(self.base, 'dirE'), 0o777) + except FileNotFoundError: + pass os_helper.rmtree(self.base) def tempdir(self): d = os_helper._longpath(tempfile.mkdtemp(suffix='-dirD', dir=os.getcwd())) self.addCleanup(os_helper.rmtree, d) - return d + return self.cls(d) def test_matches_pathbase_api(self): our_names = {name for name in dir(self.cls) if name[0] != '_'} @@ -653,6 +656,18 @@ def test_open_unbuffered(self): self.assertIsInstance(f, io.RawIOBase) self.assertEqual(f.read().strip(), b"this is file A") + def test_copytree_no_read_permission(self): + base = self.cls(self.base) + source = base / 'dirE' + target = base / 'copyE' + self.assertRaises(PermissionError, source.copytree, target) + self.assertFalse(target.exists()) + errors = [] + source.copytree(target, on_error=errors.append) + self.assertEqual(len(errors), 1) + self.assertIsInstance(errors[0], PermissionError) + self.assertFalse(target.exists()) + def test_resolve_nonexist_relative_issue38671(self): p = self.cls('non', 'exist') diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index cd629c01871165..b79070f98cb0b8 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1822,6 +1822,170 @@ def test_copy_empty(self): self.assertTrue(target.exists()) self.assertEqual(target.read_bytes(), b'') + def test_copytree_simple(self): + base = self.cls(self.base) + source = base / 'dirC' + target = base / 'copyC' + source.copytree(target) + self.assertTrue(target.is_dir()) + self.assertTrue(target.joinpath('dirD').is_dir()) + self.assertTrue(target.joinpath('dirD', 'fileD').is_file()) + self.assertEqual(target.joinpath('dirD', 'fileD').read_text(), + "this is file D\n") + self.assertTrue(target.joinpath('fileC').is_file()) + self.assertTrue(target.joinpath('fileC').read_text(), + "this is file C\n") + + def test_copytree_complex(self): + def ordered_walk(path): + for dirpath, dirnames, filenames in path.walk(follow_symlinks=True): + dirnames.sort() + filenames.sort() + yield dirpath, dirnames, filenames + source = self.cls(self.base) + target = self.tempdir() / 'target' + + # Special cases tested elsehwere + source.joinpath('dirE').rmdir() + if self.can_symlink: + source.joinpath('brokenLink').unlink() + source.joinpath('brokenLinkLoop').unlink() + source.joinpath('dirB', 'linkD').unlink() + + # Perform the copy + source.copytree(target) + + # Compare the source and target trees + source_walk = ordered_walk(source) + target_walk = ordered_walk(target) + for source_item, target_item in zip(source_walk, target_walk, strict=True): + self.assertEqual(source_item[0].relative_to(source), + target_item[0].relative_to(target)) # dirpath + self.assertEqual(source_item[1], target_item[1]) # dirnames + self.assertEqual(source_item[2], target_item[2]) # filenames + + def test_copytree_complex_follow_symlinks_false(self): + def ordered_walk(path): + for dirpath, dirnames, filenames in path.walk(follow_symlinks=False): + dirnames.sort() + filenames.sort() + yield dirpath, dirnames, filenames + source = self.cls(self.base) + target = self.tempdir() / 'target' + + # Special cases tested elsewhere + source.joinpath('dirE').rmdir() + + # Perform the copy + source.copytree(target, follow_symlinks=False) + + # Compare the source and target trees + source_walk = ordered_walk(source) + target_walk = ordered_walk(target) + for source_item, target_item in zip(source_walk, target_walk, strict=True): + self.assertEqual(source_item[0].relative_to(source), + target_item[0].relative_to(target)) # dirpath + self.assertEqual(source_item[1], target_item[1]) # dirnames + self.assertEqual(source_item[2], target_item[2]) # filenames + + def test_copytree_to_existing_directory(self): + base = self.cls(self.base) + source = base / 'dirC' + target = base / 'copyC' + target.mkdir() + target.joinpath('dirD').mkdir() + self.assertRaises(FileExistsError, source.copytree, target) + + def test_copytree_to_existing_directory_dirs_exist_ok(self): + base = self.cls(self.base) + source = base / 'dirC' + target = base / 'copyC' + target.mkdir() + target.joinpath('dirD').mkdir() + source.copytree(target, dirs_exist_ok=True) + self.assertTrue(target.is_dir()) + self.assertTrue(target.joinpath('dirD').is_dir()) + self.assertTrue(target.joinpath('dirD', 'fileD').is_file()) + self.assertEqual(target.joinpath('dirD', 'fileD').read_text(), + "this is file D\n") + self.assertTrue(target.joinpath('fileC').is_file()) + self.assertTrue(target.joinpath('fileC').read_text(), + "this is file C\n") + + def test_copytree_file(self): + base = self.cls(self.base) + source = base / 'fileA' + target = base / 'copyA' + self.assertRaises(NotADirectoryError, source.copytree, target) + + def test_copytree_file_on_error(self): + base = self.cls(self.base) + source = base / 'fileA' + target = base / 'copyA' + errors = [] + source.copytree(target, on_error=errors.append) + self.assertEqual(len(errors), 1) + self.assertIsInstance(errors[0], NotADirectoryError) + + def test_copytree_ignore_false(self): + base = self.cls(self.base) + source = base / 'dirC' + target = base / 'copyC' + ignores = [] + def ignore_false(path): + ignores.append(path) + return False + source.copytree(target, ignore=ignore_false) + self.assertEqual(set(ignores), { + source / 'dirD', + source / 'dirD' / 'fileD', + source / 'fileC', + source / 'novel.txt', + }) + self.assertTrue(target.is_dir()) + self.assertTrue(target.joinpath('dirD').is_dir()) + self.assertTrue(target.joinpath('dirD', 'fileD').is_file()) + self.assertEqual(target.joinpath('dirD', 'fileD').read_text(), + "this is file D\n") + self.assertTrue(target.joinpath('fileC').is_file()) + self.assertTrue(target.joinpath('fileC').read_text(), + "this is file C\n") + + def test_copytree_ignore_true(self): + base = self.cls(self.base) + source = base / 'dirC' + target = base / 'copyC' + ignores = [] + def ignore_true(path): + ignores.append(path) + return True + source.copytree(target, ignore=ignore_true) + self.assertEqual(set(ignores), { + source / 'dirD', + source / 'fileC', + source / 'novel.txt', + }) + self.assertTrue(target.is_dir()) + self.assertFalse(target.joinpath('dirD').exists()) + self.assertFalse(target.joinpath('fileC').exists()) + self.assertFalse(target.joinpath('novel.txt').exists()) + + @needs_symlinks + def test_copytree_dangling_symlink(self): + base = self.cls(self.base) + source = base / 'source' + target = base / 'target' + + source.mkdir() + source.joinpath('link').symlink_to('nonexistent') + + self.assertRaises(FileNotFoundError, source.copytree, target) + + target2 = base / 'target2' + source.copytree(target2, follow_symlinks=False) + self.assertTrue(target2.joinpath('link').is_symlink()) + self.assertEqual(target2.joinpath('link').readlink(), self.cls('nonexistent')) + def test_iterdir(self): P = self.cls p = P(self.base) diff --git a/Misc/NEWS.d/next/Library/2024-06-19-03-09-11.gh-issue-73991.lU_jK9.rst b/Misc/NEWS.d/next/Library/2024-06-19-03-09-11.gh-issue-73991.lU_jK9.rst new file mode 100644 index 00000000000000..60a1b68d5bb1a8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-19-03-09-11.gh-issue-73991.lU_jK9.rst @@ -0,0 +1 @@ +Add :meth:`pathlib.Path.copytree`, which recursively copies a directory. From ab5fa4c6c885ddb77caddd4955ad7ff61318adb4 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 22 Jun 2024 02:05:27 +0100 Subject: [PATCH 2/4] Apply suggestions from code review Co-authored-by: Nice Zombies --- Doc/library/pathlib.rst | 2 +- Lib/test/test_pathlib/test_pathlib.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index b6c007a1fc1d53..e585bcef915fbf 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1455,7 +1455,7 @@ Copying, renaming and deleting .. versionadded:: 3.14 -.. method:: Path.copytree(target, *, follow_symlinks=True, dirs_exist_ok=False, +.. method:: Path.copytree(target, *, follow_symlinks=True, dirs_exist_ok=False, \ ignore=None, on_error=None) Recursively copy this directory tree to the given destination. diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 2db33be94c6270..e6c074a3e87425 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -656,6 +656,7 @@ def test_open_unbuffered(self): self.assertIsInstance(f, io.RawIOBase) self.assertEqual(f.read().strip(), b"this is file A") + @unittest.skipIf(sys.platform == "win32", "directories are always readable on Windows") def test_copytree_no_read_permission(self): base = self.cls(self.base) source = base / 'dirE' From f9bfdc98e76fff30ab3c2b94da0ce8e8d7e8ca10 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 23 Jun 2024 17:37:44 +0100 Subject: [PATCH 3/4] Update Lib/test/test_pathlib/test_pathlib.py Co-authored-by: Nice Zombies --- Lib/test/test_pathlib/test_pathlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index e6c074a3e87425..8862df19fec712 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -656,7 +656,7 @@ def test_open_unbuffered(self): self.assertIsInstance(f, io.RawIOBase) self.assertEqual(f.read().strip(), b"this is file A") - @unittest.skipIf(sys.platform == "win32", "directories are always readable on Windows") + @unittest.skipIf(sys.platform == "win32" or sys.platform == "wasi", "directories are always readable on Windows and WASI") def test_copytree_no_read_permission(self): base = self.cls(self.base) source = base / 'dirE' From 8c453bbcc320f4352a080af5de4aaa9590c6f5ce Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 23 Jun 2024 17:52:16 +0100 Subject: [PATCH 4/4] Improve tests --- Lib/test/test_pathlib/test_pathlib.py | 7 +-- Lib/test/test_pathlib/test_pathlib_abc.py | 57 ++++++++++------------- 2 files changed, 27 insertions(+), 37 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 8862df19fec712..6b5e90fbcf718e 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -513,17 +513,14 @@ def setUp(self): os.chmod(self.parser.join(self.base, 'dirE'), 0) def tearDown(self): - try: - os.chmod(self.parser.join(self.base, 'dirE'), 0o777) - except FileNotFoundError: - pass + os.chmod(self.parser.join(self.base, 'dirE'), 0o777) os_helper.rmtree(self.base) def tempdir(self): d = os_helper._longpath(tempfile.mkdtemp(suffix='-dirD', dir=os.getcwd())) self.addCleanup(os_helper.rmtree, d) - return self.cls(d) + return d def test_matches_pathbase_api(self): our_names = {name for name in dir(self.cls) if name[0] != '_'} diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index b79070f98cb0b8..ad692e872ede0b 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1836,24 +1836,23 @@ def test_copytree_simple(self): self.assertTrue(target.joinpath('fileC').read_text(), "this is file C\n") - def test_copytree_complex(self): + def test_copytree_complex(self, follow_symlinks=True): def ordered_walk(path): - for dirpath, dirnames, filenames in path.walk(follow_symlinks=True): + for dirpath, dirnames, filenames in path.walk(follow_symlinks=follow_symlinks): dirnames.sort() filenames.sort() yield dirpath, dirnames, filenames - source = self.cls(self.base) - target = self.tempdir() / 'target' + base = self.cls(self.base) + source = base / 'dirC' - # Special cases tested elsehwere - source.joinpath('dirE').rmdir() if self.can_symlink: - source.joinpath('brokenLink').unlink() - source.joinpath('brokenLinkLoop').unlink() - source.joinpath('dirB', 'linkD').unlink() + # Add some symlinks + source.joinpath('linkC').symlink_to('fileC') + source.joinpath('linkD').symlink_to('dirD') # Perform the copy - source.copytree(target) + target = base / 'copyC' + source.copytree(target, follow_symlinks=follow_symlinks) # Compare the source and target trees source_walk = ordered_walk(source) @@ -1863,30 +1862,24 @@ def ordered_walk(path): target_item[0].relative_to(target)) # dirpath self.assertEqual(source_item[1], target_item[1]) # dirnames self.assertEqual(source_item[2], target_item[2]) # filenames + # Compare files and symlinks + for filename in source_item[2]: + source_file = source_item[0].joinpath(filename) + target_file = target_item[0].joinpath(filename) + if follow_symlinks or not source_file.is_symlink(): + # Regular file. + self.assertEqual(source_file.read_bytes(), target_file.read_bytes()) + elif source_file.is_dir(): + # Symlink to directory. + self.assertTrue(target_file.is_dir()) + self.assertEqual(source_file.readlink(), target_file.readlink()) + else: + # Symlink to file. + self.assertEqual(source_file.read_bytes(), target_file.read_bytes()) + self.assertEqual(source_file.readlink(), target_file.readlink()) def test_copytree_complex_follow_symlinks_false(self): - def ordered_walk(path): - for dirpath, dirnames, filenames in path.walk(follow_symlinks=False): - dirnames.sort() - filenames.sort() - yield dirpath, dirnames, filenames - source = self.cls(self.base) - target = self.tempdir() / 'target' - - # Special cases tested elsewhere - source.joinpath('dirE').rmdir() - - # Perform the copy - source.copytree(target, follow_symlinks=False) - - # Compare the source and target trees - source_walk = ordered_walk(source) - target_walk = ordered_walk(target) - for source_item, target_item in zip(source_walk, target_walk, strict=True): - self.assertEqual(source_item[0].relative_to(source), - target_item[0].relative_to(target)) # dirpath - self.assertEqual(source_item[1], target_item[1]) # dirnames - self.assertEqual(source_item[2], target_item[2]) # filenames + self.test_copytree_complex(follow_symlinks=False) def test_copytree_to_existing_directory(self): base = self.cls(self.base)