Skip to content

Decouple finding, processing, and saving files when installing wheels #8545

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 93 additions & 73 deletions src/pip/_internal/operations/install/wheel.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import sys
import warnings
from base64 import urlsafe_b64encode
from itertools import starmap
from itertools import chain, starmap
from zipfile import ZipFile

from pip._vendor import pkg_resources
Expand Down Expand Up @@ -46,7 +46,6 @@
from email.message import Message
from typing import (
Any,
Callable,
Dict,
IO,
Iterable,
Expand Down Expand Up @@ -96,13 +95,13 @@ def csv_io_kwargs(mode):


def fix_script(path):
# type: (text_type) -> Optional[bool]
# type: (text_type) -> bool
"""Replace #!python with #!/path/to/python
Return True if file was changed.
"""
# XXX RECORD hashes will need to be updated
if not os.path.isfile(path):
return None
return False

with open(path, 'rb') as script:
firstline = script.readline()
Expand Down Expand Up @@ -388,6 +387,67 @@ def get_console_script_specs(console):
return scripts_to_generate


class File(object):
def __init__(self, src_path, dest_path):
# type: (text_type, text_type) -> None
self.src_path = src_path
self.dest_path = dest_path
self.changed = False

def save(self):
# type: () -> None
# directory creation is lazy and after the file filtering above
# to ensure we don't install empty dirs; empty dirs can't be
# uninstalled.
parent_dir = os.path.dirname(self.dest_path)
ensure_dir(parent_dir)

# copyfile (called below) truncates the destination if it
# exists and then writes the new contents. This is fine in most
# cases, but can cause a segfault if pip has loaded a shared
# object (e.g. from pyopenssl through its vendored urllib3)
# Since the shared object is mmap'd an attempt to call a
# symbol in it will then cause a segfault. Unlinking the file
# allows writing of new contents while allowing the process to
# continue to use the old copy.
if os.path.exists(self.dest_path):
os.unlink(self.dest_path)

# We use copyfile (not move, copy, or copy2) to be extra sure
# that we are not moving directories over (copyfile fails for
# directories) as well as to ensure that we are not copying
# over any metadata because we want more control over what
# metadata we actually copy over.
shutil.copyfile(self.src_path, self.dest_path)

# Copy over the metadata for the file, currently this only
# includes the atime and mtime.
st = os.stat(self.src_path)
if hasattr(os, "utime"):
os.utime(self.dest_path, (st.st_atime, st.st_mtime))

# If our file is executable, then make our destination file
# executable.
if os.access(self.src_path, os.X_OK):
st = os.stat(self.src_path)
permissions = (
st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
)
os.chmod(self.dest_path, permissions)


class ScriptFile(File):
def save(self):
# type: () -> None
super(ScriptFile, self).save()
self.changed = fix_script(self.dest_path)

@classmethod
def from_file(cls, file):
# type: (File) -> ScriptFile
return cls(file.src_path, file.dest_path)


class MissingCallableSuffix(Exception):
pass

Expand Down Expand Up @@ -461,71 +521,24 @@ def record_installed(srcfile, destfile, modified=False):
if modified:
changed.add(_fs_to_record_path(destfile))

def clobber(
source, # type: text_type
dest, # type: text_type
is_base, # type: bool
fixer=None, # type: Optional[Callable[[text_type], Any]]
filter=None # type: Optional[Callable[[text_type], bool]]
def files_to_process(
source, # type: text_type
dest, # type: text_type
is_base, # type: bool
):
# type: (...) -> None
ensure_dir(dest) # common for the 'include' path

# type: (...) -> Iterable[File]
for dir, subdirs, files in os.walk(source):
basedir = dir[len(source):].lstrip(os.path.sep)
destdir = os.path.join(dest, basedir)
if is_base and basedir == '':
subdirs[:] = [s for s in subdirs if not s.endswith('.data')]
subdirs[:] = [
s for s in subdirs if not s.endswith('.data')
]
for f in files:
# Skip unwanted files
if filter and filter(f):
continue
srcfile = os.path.join(dir, f)
destfile = os.path.join(dest, basedir, f)
# directory creation is lazy and after the file filtering above
# to ensure we don't install empty dirs; empty dirs can't be
# uninstalled.
ensure_dir(destdir)

# copyfile (called below) truncates the destination if it
# exists and then writes the new contents. This is fine in most
# cases, but can cause a segfault if pip has loaded a shared
# object (e.g. from pyopenssl through its vendored urllib3)
# Since the shared object is mmap'd an attempt to call a
# symbol in it will then cause a segfault. Unlinking the file
# allows writing of new contents while allowing the process to
# continue to use the old copy.
if os.path.exists(destfile):
os.unlink(destfile)

# We use copyfile (not move, copy, or copy2) to be extra sure
# that we are not moving directories over (copyfile fails for
# directories) as well as to ensure that we are not copying
# over any metadata because we want more control over what
# metadata we actually copy over.
shutil.copyfile(srcfile, destfile)

# Copy over the metadata for the file, currently this only
# includes the atime and mtime.
st = os.stat(srcfile)
if hasattr(os, "utime"):
os.utime(destfile, (st.st_atime, st.st_mtime))

# If our file is executable, then make our destination file
# executable.
if os.access(srcfile, os.X_OK):
st = os.stat(srcfile)
permissions = (
st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
)
os.chmod(destfile, permissions)
yield File(srcfile, destfile)

changed = False
if fixer:
changed = fixer(destfile)
record_installed(srcfile, destfile, changed)

clobber(
files = files_to_process(
ensure_text(source, encoding=sys.getfilesystemencoding()),
ensure_text(lib_dir, encoding=sys.getfilesystemencoding()),
True,
Expand All @@ -537,10 +550,12 @@ def clobber(
)
console, gui = get_entrypoints(distribution)

def is_entrypoint_wrapper(name):
# type: (text_type) -> bool
def not_entrypoint_wrapper(file):
# type: (File) -> bool
# EP, EP.exe and EP-script.py are scripts generated for
# entry point EP by setuptools
path = file.src_path
name = os.path.basename(path)
if name.lower().endswith('.exe'):
matchname = name[:-4]
elif name.lower().endswith('-script.py'):
Expand All @@ -550,31 +565,36 @@ def is_entrypoint_wrapper(name):
else:
matchname = name
# Ignore setuptools-generated scripts
return (matchname in console or matchname in gui)
return not (matchname in console or matchname in gui)

# Zip file path separators must be /
subdirs = set(p.split("/", 1)[0] for p in wheel_zip.namelist())
data_dirs = [s for s in subdirs if s.endswith('.data')]

for datadir in data_dirs:
fixer = None
filter = None
for subdir in os.listdir(os.path.join(wheeldir, datadir)):
fixer = None
if subdir == 'scripts':
fixer = fix_script
filter = is_entrypoint_wrapper
full_datadir_path = os.path.join(wheeldir, datadir, subdir)
dest = getattr(scheme, subdir)
clobber(
data_scheme_files = files_to_process(
ensure_text(
full_datadir_path, encoding=sys.getfilesystemencoding()
),
ensure_text(dest, encoding=sys.getfilesystemencoding()),
False,
fixer=fixer,
filter=filter,
)
if subdir == 'scripts':
data_scheme_files = filter(
not_entrypoint_wrapper, data_scheme_files
)
data_scheme_files = map(
ScriptFile.from_file, data_scheme_files
)

files = chain(files, data_scheme_files)

for file in files:
file.save()
record_installed(file.src_path, file.dest_path, file.changed)

def pyc_source_file_paths():
# type: () -> Iterator[text_type]
Expand Down