From 6469bad4f1bdcd25597192c4475b418f36f1a3a5 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 5 Jul 2020 11:25:00 -0400 Subject: [PATCH 01/14] Don't unconditionally create destination directory We will already create the parent directories for any files that we want to install, so making sure the destination exists should not be required. --- src/pip/_internal/operations/install/wheel.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index 87c94918c24..593e12bc00d 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -469,8 +469,6 @@ def clobber( filter=None # type: Optional[Callable[[text_type], bool]] ): # type: (...) -> None - ensure_dir(dest) # common for the 'include' path - for dir, subdirs, files in os.walk(source): basedir = dir[len(source):].lstrip(os.path.sep) destdir = os.path.join(dest, basedir) From 59bee09d26e2a2d0cfab951d755a3c94204f8d20 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Fri, 3 Jul 2020 12:39:49 -0400 Subject: [PATCH 02/14] Derive parent directory from destination path This simplifies the relationship between the loop body and the "get files" section at the top of the loop, making it easier to refactor. --- src/pip/_internal/operations/install/wheel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index 593e12bc00d..f12db919ea4 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -471,7 +471,6 @@ def clobber( # type: (...) -> None for dir, subdirs, files in os.walk(source): basedir = dir[len(source):].lstrip(os.path.sep) - destdir = os.path.join(dest, basedir) if is_base and basedir == '': subdirs[:] = [s for s in subdirs if not s.endswith('.data')] for f in files: @@ -483,7 +482,8 @@ def clobber( # directory creation is lazy and after the file filtering above # to ensure we don't install empty dirs; empty dirs can't be # uninstalled. - ensure_dir(destdir) + parent_dir = os.path.dirname(destfile) + ensure_dir(parent_dir) # copyfile (called below) truncates the destination if it # exists and then writes the new contents. This is fine in most From d81aa07ea676e63d1f40717f48709913604f9806 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Fri, 3 Jul 2020 14:13:51 -0400 Subject: [PATCH 03/14] Separate file path calculation from file operations in clobber Separating the concerns of "get files to process" and "operate on the files" enforces that there are no dependencies between these parts of `clobber` and will let us extract the "get files to process" out of `clobber` and customize it for each use case. --- src/pip/_internal/operations/install/wheel.py | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index f12db919ea4..bb95c1967a3 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -469,16 +469,24 @@ def clobber( filter=None # type: Optional[Callable[[text_type], bool]] ): # type: (...) -> None - for dir, subdirs, files in os.walk(source): - basedir = dir[len(source):].lstrip(os.path.sep) - if is_base and basedir == '': - subdirs[:] = [s for s in subdirs if not s.endswith('.data')] - for f in files: - # Skip unwanted files - if filter and filter(f): - continue - srcfile = os.path.join(dir, f) - destfile = os.path.join(dest, basedir, f) + def files_to_process(): + # type: () -> Iterable[Tuple[text_type, text_type]] + for dir, subdirs, files in os.walk(source): + basedir = dir[len(source):].lstrip(os.path.sep) + if is_base and basedir == '': + subdirs[:] = [ + s for s in subdirs if not s.endswith('.data') + ] + for f in files: + # Skip unwanted files + if filter and filter(f): + continue + srcfile = os.path.join(dir, f) + destfile = os.path.join(dest, basedir, f) + yield srcfile, destfile + + if True: # this just preserves indentation for a cleaner commit diff + for srcfile, destfile in files_to_process(): # directory creation is lazy and after the file filtering above # to ensure we don't install empty dirs; empty dirs can't be # uninstalled. From 3da26823788b41847da686e74cc8ed165efdfdec Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Fri, 3 Jul 2020 14:15:46 -0400 Subject: [PATCH 04/14] Dedent unconditional block That just made the previous diff cleaner. --- src/pip/_internal/operations/install/wheel.py | 87 +++++++++---------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index bb95c1967a3..5cba3cbb26c 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -485,51 +485,50 @@ def files_to_process(): destfile = os.path.join(dest, basedir, f) yield srcfile, destfile - if True: # this just preserves indentation for a cleaner commit diff - for srcfile, destfile in files_to_process(): - # directory creation is lazy and after the file filtering above - # to ensure we don't install empty dirs; empty dirs can't be - # uninstalled. - parent_dir = os.path.dirname(destfile) - ensure_dir(parent_dir) - - # copyfile (called below) truncates the destination if it - # exists and then writes the new contents. This is fine in most - # cases, but can cause a segfault if pip has loaded a shared - # object (e.g. from pyopenssl through its vendored urllib3) - # Since the shared object is mmap'd an attempt to call a - # symbol in it will then cause a segfault. Unlinking the file - # allows writing of new contents while allowing the process to - # continue to use the old copy. - if os.path.exists(destfile): - os.unlink(destfile) - - # We use copyfile (not move, copy, or copy2) to be extra sure - # that we are not moving directories over (copyfile fails for - # directories) as well as to ensure that we are not copying - # over any metadata because we want more control over what - # metadata we actually copy over. - shutil.copyfile(srcfile, destfile) - - # Copy over the metadata for the file, currently this only - # includes the atime and mtime. + for srcfile, destfile in files_to_process(): + # directory creation is lazy and after the file filtering above + # to ensure we don't install empty dirs; empty dirs can't be + # uninstalled. + parent_dir = os.path.dirname(destfile) + ensure_dir(parent_dir) + + # copyfile (called below) truncates the destination if it + # exists and then writes the new contents. This is fine in most + # cases, but can cause a segfault if pip has loaded a shared + # object (e.g. from pyopenssl through its vendored urllib3) + # Since the shared object is mmap'd an attempt to call a + # symbol in it will then cause a segfault. Unlinking the file + # allows writing of new contents while allowing the process to + # continue to use the old copy. + if os.path.exists(destfile): + os.unlink(destfile) + + # We use copyfile (not move, copy, or copy2) to be extra sure + # that we are not moving directories over (copyfile fails for + # directories) as well as to ensure that we are not copying + # over any metadata because we want more control over what + # metadata we actually copy over. + shutil.copyfile(srcfile, destfile) + + # Copy over the metadata for the file, currently this only + # includes the atime and mtime. + st = os.stat(srcfile) + if hasattr(os, "utime"): + os.utime(destfile, (st.st_atime, st.st_mtime)) + + # If our file is executable, then make our destination file + # executable. + if os.access(srcfile, os.X_OK): st = os.stat(srcfile) - if hasattr(os, "utime"): - os.utime(destfile, (st.st_atime, st.st_mtime)) - - # If our file is executable, then make our destination file - # executable. - if os.access(srcfile, os.X_OK): - st = os.stat(srcfile) - permissions = ( - st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH - ) - os.chmod(destfile, permissions) - - changed = False - if fixer: - changed = fixer(destfile) - record_installed(srcfile, destfile, changed) + permissions = ( + st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH + ) + os.chmod(destfile, permissions) + + changed = False + if fixer: + changed = fixer(destfile) + record_installed(srcfile, destfile, changed) clobber( ensure_text(source, encoding=sys.getfilesystemencoding()), From 8b53d20d74a43bdbaac365e53af58dec8e1028f9 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 5 Jul 2020 12:09:25 -0400 Subject: [PATCH 05/14] Add File class to represent a file to install This makes the interface between the file-generating function and the rest of clobber easier to understand, and will act as a type we can pass around and wrap to reduce the caller-specific processing we currently do. --- src/pip/_internal/operations/install/wheel.py | 35 +++++++++++-------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index 5cba3cbb26c..2f1fdc9daa4 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -388,6 +388,13 @@ def get_console_script_specs(console): return scripts_to_generate +class File(object): + def __init__(self, src_path, dest_path): + # type: (text_type, text_type) -> None + self.src_path = src_path + self.dest_path = dest_path + + class MissingCallableSuffix(Exception): pass @@ -470,7 +477,7 @@ def clobber( ): # type: (...) -> None def files_to_process(): - # type: () -> Iterable[Tuple[text_type, text_type]] + # type: () -> Iterable[File] for dir, subdirs, files in os.walk(source): basedir = dir[len(source):].lstrip(os.path.sep) if is_base and basedir == '': @@ -483,13 +490,13 @@ def files_to_process(): continue srcfile = os.path.join(dir, f) destfile = os.path.join(dest, basedir, f) - yield srcfile, destfile + yield File(srcfile, destfile) - for srcfile, destfile in files_to_process(): + for f in files_to_process(): # directory creation is lazy and after the file filtering above # to ensure we don't install empty dirs; empty dirs can't be # uninstalled. - parent_dir = os.path.dirname(destfile) + parent_dir = os.path.dirname(f.dest_path) ensure_dir(parent_dir) # copyfile (called below) truncates the destination if it @@ -500,35 +507,35 @@ def files_to_process(): # symbol in it will then cause a segfault. Unlinking the file # allows writing of new contents while allowing the process to # continue to use the old copy. - if os.path.exists(destfile): - os.unlink(destfile) + if os.path.exists(f.dest_path): + os.unlink(f.dest_path) # We use copyfile (not move, copy, or copy2) to be extra sure # that we are not moving directories over (copyfile fails for # directories) as well as to ensure that we are not copying # over any metadata because we want more control over what # metadata we actually copy over. - shutil.copyfile(srcfile, destfile) + shutil.copyfile(f.src_path, f.dest_path) # Copy over the metadata for the file, currently this only # includes the atime and mtime. - st = os.stat(srcfile) + st = os.stat(f.src_path) if hasattr(os, "utime"): - os.utime(destfile, (st.st_atime, st.st_mtime)) + os.utime(f.dest_path, (st.st_atime, st.st_mtime)) # If our file is executable, then make our destination file # executable. - if os.access(srcfile, os.X_OK): - st = os.stat(srcfile) + if os.access(f.src_path, os.X_OK): + st = os.stat(f.src_path) permissions = ( st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH ) - os.chmod(destfile, permissions) + os.chmod(f.dest_path, permissions) changed = False if fixer: - changed = fixer(destfile) - record_installed(srcfile, destfile, changed) + changed = fixer(f.dest_path) + record_installed(f.src_path, f.dest_path, changed) clobber( ensure_text(source, encoding=sys.getfilesystemencoding()), From fa4683e6e6b778c81a17cd922ec3fab0f2c6b2a6 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 5 Jul 2020 12:14:14 -0400 Subject: [PATCH 06/14] Extract file-to-install generation outside clobber Separating generating the files from operating on them will let us be more explicit about what features of extracted files we depend on, and clean up some of our logic by making them less caller-specific. --- src/pip/_internal/operations/install/wheel.py | 51 ++++++++++--------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index 2f1fdc9daa4..1e304419a84 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -468,31 +468,33 @@ def record_installed(srcfile, destfile, modified=False): if modified: changed.add(_fs_to_record_path(destfile)) + def files_to_process( + source, # type: text_type + dest, # type: text_type + is_base, # type: bool + filter=None, # type: Optional[Callable[[text_type], bool]] + ): + # type: (...) -> Iterable[File] + for dir, subdirs, files in os.walk(source): + basedir = dir[len(source):].lstrip(os.path.sep) + if is_base and basedir == '': + subdirs[:] = [ + s for s in subdirs if not s.endswith('.data') + ] + for f in files: + # Skip unwanted files + if filter and filter(f): + continue + srcfile = os.path.join(dir, f) + destfile = os.path.join(dest, basedir, f) + yield File(srcfile, destfile) + def clobber( - source, # type: text_type - dest, # type: text_type - is_base, # type: bool + files, # type: Iterator[File] fixer=None, # type: Optional[Callable[[text_type], Any]] - filter=None # type: Optional[Callable[[text_type], bool]] ): # type: (...) -> None - def files_to_process(): - # type: () -> Iterable[File] - for dir, subdirs, files in os.walk(source): - basedir = dir[len(source):].lstrip(os.path.sep) - if is_base and basedir == '': - subdirs[:] = [ - s for s in subdirs if not s.endswith('.data') - ] - for f in files: - # Skip unwanted files - if filter and filter(f): - continue - srcfile = os.path.join(dir, f) - destfile = os.path.join(dest, basedir, f) - yield File(srcfile, destfile) - - for f in files_to_process(): + for f in files: # directory creation is lazy and after the file filtering above # to ensure we don't install empty dirs; empty dirs can't be # uninstalled. @@ -537,11 +539,12 @@ def files_to_process(): changed = fixer(f.dest_path) record_installed(f.src_path, f.dest_path, changed) - clobber( + root_scheme_files = files_to_process( ensure_text(source, encoding=sys.getfilesystemencoding()), ensure_text(lib_dir, encoding=sys.getfilesystemencoding()), True, ) + clobber(root_scheme_files) # Get the defined entry points distribution = pkg_resources_distribution_for_wheel( @@ -578,15 +581,15 @@ def is_entrypoint_wrapper(name): filter = is_entrypoint_wrapper full_datadir_path = os.path.join(wheeldir, datadir, subdir) dest = getattr(scheme, subdir) - clobber( + data_scheme_files = files_to_process( ensure_text( full_datadir_path, encoding=sys.getfilesystemencoding() ), ensure_text(dest, encoding=sys.getfilesystemencoding()), False, - fixer=fixer, filter=filter, ) + clobber(data_scheme_files, fixer=fixer) def pyc_source_file_paths(): # type: () -> Iterator[text_type] From 2839e4a932dbbd261120a49f344b21ed4ec9df17 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 5 Jul 2020 12:21:16 -0400 Subject: [PATCH 07/14] Extract file-saving logic into File class This will let us refactor the existing logic as a composition of behaviors, so later changes to read from a zip file directly will be less impactful. --- src/pip/_internal/operations/install/wheel.py | 81 ++++++++++--------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index 1e304419a84..61818d6eab9 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -394,6 +394,47 @@ def __init__(self, src_path, dest_path): self.src_path = src_path self.dest_path = dest_path + def save(self): + # type: () -> None + # directory creation is lazy and after the file filtering above + # to ensure we don't install empty dirs; empty dirs can't be + # uninstalled. + parent_dir = os.path.dirname(self.dest_path) + ensure_dir(parent_dir) + + # copyfile (called below) truncates the destination if it + # exists and then writes the new contents. This is fine in most + # cases, but can cause a segfault if pip has loaded a shared + # object (e.g. from pyopenssl through its vendored urllib3) + # Since the shared object is mmap'd an attempt to call a + # symbol in it will then cause a segfault. Unlinking the file + # allows writing of new contents while allowing the process to + # continue to use the old copy. + if os.path.exists(self.dest_path): + os.unlink(self.dest_path) + + # We use copyfile (not move, copy, or copy2) to be extra sure + # that we are not moving directories over (copyfile fails for + # directories) as well as to ensure that we are not copying + # over any metadata because we want more control over what + # metadata we actually copy over. + shutil.copyfile(self.src_path, self.dest_path) + + # Copy over the metadata for the file, currently this only + # includes the atime and mtime. + st = os.stat(self.src_path) + if hasattr(os, "utime"): + os.utime(self.dest_path, (st.st_atime, st.st_mtime)) + + # If our file is executable, then make our destination file + # executable. + if os.access(self.src_path, os.X_OK): + st = os.stat(self.src_path) + permissions = ( + st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH + ) + os.chmod(self.dest_path, permissions) + class MissingCallableSuffix(Exception): pass @@ -495,45 +536,7 @@ def clobber( ): # type: (...) -> None for f in files: - # directory creation is lazy and after the file filtering above - # to ensure we don't install empty dirs; empty dirs can't be - # uninstalled. - parent_dir = os.path.dirname(f.dest_path) - ensure_dir(parent_dir) - - # copyfile (called below) truncates the destination if it - # exists and then writes the new contents. This is fine in most - # cases, but can cause a segfault if pip has loaded a shared - # object (e.g. from pyopenssl through its vendored urllib3) - # Since the shared object is mmap'd an attempt to call a - # symbol in it will then cause a segfault. Unlinking the file - # allows writing of new contents while allowing the process to - # continue to use the old copy. - if os.path.exists(f.dest_path): - os.unlink(f.dest_path) - - # We use copyfile (not move, copy, or copy2) to be extra sure - # that we are not moving directories over (copyfile fails for - # directories) as well as to ensure that we are not copying - # over any metadata because we want more control over what - # metadata we actually copy over. - shutil.copyfile(f.src_path, f.dest_path) - - # Copy over the metadata for the file, currently this only - # includes the atime and mtime. - st = os.stat(f.src_path) - if hasattr(os, "utime"): - os.utime(f.dest_path, (st.st_atime, st.st_mtime)) - - # If our file is executable, then make our destination file - # executable. - if os.access(f.src_path, os.X_OK): - st = os.stat(f.src_path) - permissions = ( - st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH - ) - os.chmod(f.dest_path, permissions) - + f.save() changed = False if fixer: changed = fixer(f.dest_path) From 7c4e7ffd5f97f964109c410cd7c6cb3a19bc4eca Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 5 Jul 2020 12:27:54 -0400 Subject: [PATCH 08/14] Simplify return type for fix_script No caller actually checks for None, so just return False. --- src/pip/_internal/operations/install/wheel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index 61818d6eab9..aabfa5ec34f 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -96,13 +96,13 @@ def csv_io_kwargs(mode): def fix_script(path): - # type: (text_type) -> Optional[bool] + # type: (text_type) -> bool """Replace #!python with #!/path/to/python Return True if file was changed. """ # XXX RECORD hashes will need to be updated if not os.path.isfile(path): - return None + return False with open(path, 'rb') as script: firstline = script.readline() From 077de0a4984becefdb9d80d16c6d168e4ab244b7 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 5 Jul 2020 12:35:13 -0400 Subject: [PATCH 09/14] Move script fixing into separate class This makes `clobber` much simpler, and aligns the interface of root_scheme files and data_scheme files, so we can process them in the same way. --- src/pip/_internal/operations/install/wheel.py | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index aabfa5ec34f..f84ac096cb9 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -393,6 +393,7 @@ def __init__(self, src_path, dest_path): # type: (text_type, text_type) -> None self.src_path = src_path self.dest_path = dest_path + self.changed = False def save(self): # type: () -> None @@ -436,6 +437,18 @@ def save(self): os.chmod(self.dest_path, permissions) +class ScriptFile(File): + def save(self): + # type: () -> None + super(ScriptFile, self).save() + self.changed = fix_script(self.dest_path) + + @classmethod + def from_file(cls, file): + # type: (File) -> ScriptFile + return cls(file.src_path, file.dest_path) + + class MissingCallableSuffix(Exception): pass @@ -532,15 +545,11 @@ def files_to_process( def clobber( files, # type: Iterator[File] - fixer=None, # type: Optional[Callable[[text_type], Any]] ): # type: (...) -> None for f in files: f.save() - changed = False - if fixer: - changed = fixer(f.dest_path) - record_installed(f.src_path, f.dest_path, changed) + record_installed(f.src_path, f.dest_path, f.changed) root_scheme_files = files_to_process( ensure_text(source, encoding=sys.getfilesystemencoding()), @@ -575,12 +584,9 @@ def is_entrypoint_wrapper(name): data_dirs = [s for s in subdirs if s.endswith('.data')] for datadir in data_dirs: - fixer = None filter = None for subdir in os.listdir(os.path.join(wheeldir, datadir)): - fixer = None if subdir == 'scripts': - fixer = fix_script filter = is_entrypoint_wrapper full_datadir_path = os.path.join(wheeldir, datadir, subdir) dest = getattr(scheme, subdir) @@ -592,7 +598,11 @@ def is_entrypoint_wrapper(name): False, filter=filter, ) - clobber(data_scheme_files, fixer=fixer) + if subdir == 'scripts': + data_scheme_files = map( + ScriptFile.from_file, data_scheme_files + ) + clobber(data_scheme_files) def pyc_source_file_paths(): # type: () -> Iterator[text_type] From eebe1226d6fd700158625d919f75fb273f8bc97f Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 5 Jul 2020 12:39:19 -0400 Subject: [PATCH 10/14] Make is_entrypoint_wrapper take a path instead of a name This reduces coupling between is_entrypoint_wrapper and the file generation step, and we'll be able to reuse it for processing on files. --- src/pip/_internal/operations/install/wheel.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index f84ac096cb9..660ff2c8dc7 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -536,11 +536,11 @@ def files_to_process( s for s in subdirs if not s.endswith('.data') ] for f in files: - # Skip unwanted files - if filter and filter(f): - continue srcfile = os.path.join(dir, f) destfile = os.path.join(dest, basedir, f) + # Skip unwanted files + if filter and filter(srcfile): + continue yield File(srcfile, destfile) def clobber( @@ -564,10 +564,11 @@ def clobber( ) console, gui = get_entrypoints(distribution) - def is_entrypoint_wrapper(name): + def is_entrypoint_wrapper(path): # type: (text_type) -> bool # EP, EP.exe and EP-script.py are scripts generated for # entry point EP by setuptools + name = os.path.basename(path) if name.lower().endswith('.exe'): matchname = name[:-4] elif name.lower().endswith('-script.py'): From 86b68ede1dddf7ae6b3463770daf3a7756533d49 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 5 Jul 2020 12:43:02 -0400 Subject: [PATCH 11/14] Inline filter in data scheme file calculation Since this is the only place this parameter is used, we can inline the filtering directly. --- src/pip/_internal/operations/install/wheel.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index 660ff2c8dc7..494d89912d7 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -46,7 +46,6 @@ from email.message import Message from typing import ( Any, - Callable, Dict, IO, Iterable, @@ -526,7 +525,6 @@ def files_to_process( source, # type: text_type dest, # type: text_type is_base, # type: bool - filter=None, # type: Optional[Callable[[text_type], bool]] ): # type: (...) -> Iterable[File] for dir, subdirs, files in os.walk(source): @@ -538,9 +536,6 @@ def files_to_process( for f in files: srcfile = os.path.join(dir, f) destfile = os.path.join(dest, basedir, f) - # Skip unwanted files - if filter and filter(srcfile): - continue yield File(srcfile, destfile) def clobber( @@ -585,10 +580,7 @@ def is_entrypoint_wrapper(path): data_dirs = [s for s in subdirs if s.endswith('.data')] for datadir in data_dirs: - filter = None for subdir in os.listdir(os.path.join(wheeldir, datadir)): - if subdir == 'scripts': - filter = is_entrypoint_wrapper full_datadir_path = os.path.join(wheeldir, datadir, subdir) dest = getattr(scheme, subdir) data_scheme_files = files_to_process( @@ -597,9 +589,12 @@ def is_entrypoint_wrapper(path): ), ensure_text(dest, encoding=sys.getfilesystemencoding()), False, - filter=filter, ) if subdir == 'scripts': + data_scheme_files = [ + f for f in data_scheme_files + if not is_entrypoint_wrapper(f.src_path) + ] data_scheme_files = map( ScriptFile.from_file, data_scheme_files ) From 6a87a15db12395315895834eb45b6c4f0c8e6734 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 5 Jul 2020 12:44:31 -0400 Subject: [PATCH 12/14] Reverse is_entrypoint_wrapper for use with filter This lets us get rid of our custom list comprehension in favor of the lazy filter builtin. --- src/pip/_internal/operations/install/wheel.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index 494d89912d7..5706823b46f 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -559,10 +559,11 @@ def clobber( ) console, gui = get_entrypoints(distribution) - def is_entrypoint_wrapper(path): - # type: (text_type) -> bool + def not_entrypoint_wrapper(file): + # type: (File) -> bool # EP, EP.exe and EP-script.py are scripts generated for # entry point EP by setuptools + path = file.src_path name = os.path.basename(path) if name.lower().endswith('.exe'): matchname = name[:-4] @@ -573,7 +574,7 @@ def is_entrypoint_wrapper(path): else: matchname = name # Ignore setuptools-generated scripts - return (matchname in console or matchname in gui) + return not (matchname in console or matchname in gui) # Zip file path separators must be / subdirs = set(p.split("/", 1)[0] for p in wheel_zip.namelist()) @@ -591,10 +592,9 @@ def is_entrypoint_wrapper(path): False, ) if subdir == 'scripts': - data_scheme_files = [ - f for f in data_scheme_files - if not is_entrypoint_wrapper(f.src_path) - ] + data_scheme_files = filter( + not_entrypoint_wrapper, data_scheme_files + ) data_scheme_files = map( ScriptFile.from_file, data_scheme_files ) From 4943ec30cfbadaafafa065f7ee02afede4a4b4fc Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 5 Jul 2020 12:49:22 -0400 Subject: [PATCH 13/14] Combine processing of root- and data-scheme files This refactoring makes it so that the individual contributors to the files we need to handle can be broken into their own separate functions, similar to the approach we took for console scripts. --- src/pip/_internal/operations/install/wheel.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index 5706823b46f..55bd2c78780 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -16,7 +16,7 @@ import sys import warnings from base64 import urlsafe_b64encode -from itertools import starmap +from itertools import chain, starmap from zipfile import ZipFile from pip._vendor import pkg_resources @@ -546,12 +546,11 @@ def clobber( f.save() record_installed(f.src_path, f.dest_path, f.changed) - root_scheme_files = files_to_process( + files = files_to_process( ensure_text(source, encoding=sys.getfilesystemencoding()), ensure_text(lib_dir, encoding=sys.getfilesystemencoding()), True, ) - clobber(root_scheme_files) # Get the defined entry points distribution = pkg_resources_distribution_for_wheel( @@ -598,7 +597,10 @@ def not_entrypoint_wrapper(file): data_scheme_files = map( ScriptFile.from_file, data_scheme_files ) - clobber(data_scheme_files) + + files = chain(files, data_scheme_files) + + clobber(files) def pyc_source_file_paths(): # type: () -> Iterator[text_type] From ad0f7b9bb9e38bc1ba860c32a3326e2d56b5e3c4 Mon Sep 17 00:00:00 2001 From: Chris Hunt Date: Sun, 5 Jul 2020 12:54:47 -0400 Subject: [PATCH 14/14] Inline clobber --- src/pip/_internal/operations/install/wheel.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index 55bd2c78780..a9c41b0b08b 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -538,14 +538,6 @@ def files_to_process( destfile = os.path.join(dest, basedir, f) yield File(srcfile, destfile) - def clobber( - files, # type: Iterator[File] - ): - # type: (...) -> None - for f in files: - f.save() - record_installed(f.src_path, f.dest_path, f.changed) - files = files_to_process( ensure_text(source, encoding=sys.getfilesystemencoding()), ensure_text(lib_dir, encoding=sys.getfilesystemencoding()), @@ -600,7 +592,9 @@ def not_entrypoint_wrapper(file): files = chain(files, data_scheme_files) - clobber(files) + for file in files: + file.save() + record_installed(file.src_path, file.dest_path, file.changed) def pyc_source_file_paths(): # type: () -> Iterator[text_type]