Skip to content

Commit 7a2abf4

Browse files
committed
warn before overwriting files owned by previously installed wheels
Before installing a wheel, look for other distributions that have also written files into the same top level directory. Warn if any of the same files will be modified by the new wheel. Addresses #4625 Signed-off-by: Doug Hellmann <[email protected]>
1 parent 9999f0e commit 7a2abf4

File tree

3 files changed

+161
-0
lines changed

3 files changed

+161
-0
lines changed

news/4625.bugfix

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Add a FutureWarning when installing a wheel will overwrite a file
2+
owned by another distribution.

src/pip/_internal/operations/install/wheel.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from pip._vendor import pkg_resources
2222
from pip._vendor.distlib.scripts import ScriptMaker
2323
from pip._vendor.distlib.util import get_export_entry
24+
from pip._vendor.packaging.utils import canonicalize_name
2425
from pip._vendor.six import (
2526
PY2,
2627
StringIO,
@@ -326,6 +327,104 @@ def make(self, specification, options=None):
326327
return super(PipScriptMaker, self).make(specification, options)
327328

328329

330+
def _get_file_owners(
331+
lib_dir, # type: str
332+
top_level, # type: str
333+
ignore_name, # type: str
334+
):
335+
# type: (...) -> Dict[str, List[str]]
336+
"""Return dict mapping distributions to files under a top level directory
337+
338+
Look through lib_dir for distributions that own files under the
339+
top_level directory specified. Skip distributions that match the
340+
ignore_name. Return a dict mapping filenames to the distributions
341+
that own them.
342+
343+
"""
344+
file_owners = {} # type: Dict[str, List[str]]
345+
existing_env = pkg_resources.Environment([lib_dir])
346+
canonical_name = canonicalize_name(ignore_name)
347+
for existing_dist_name in existing_env:
348+
for d in existing_env[existing_dist_name]:
349+
if canonicalize_name(d.project_name) == canonical_name:
350+
continue
351+
352+
existing_info_dir = os.path.join(
353+
lib_dir,
354+
'{}-{}.dist-info'.format(d.project_name, d.version),
355+
)
356+
357+
top_level_path = os.path.join(existing_info_dir, 'top_level.txt')
358+
if not os.path.exists(top_level_path):
359+
continue
360+
with open(top_level_path, 'r') as f:
361+
existing_top_level = f.read().strip()
362+
if existing_top_level != top_level:
363+
continue
364+
365+
record_path = os.path.join(existing_info_dir, 'RECORD')
366+
if not os.path.exists(record_path):
367+
continue
368+
with open(record_path, **csv_io_kwargs('r')) as record_file:
369+
for row in csv.reader(record_file):
370+
# Normalize the path before saving the owners,
371+
# since the record always contains forward
372+
# slashes, but when we look for a path later we
373+
# will be using a value with the native path
374+
# separator.
375+
o = file_owners.setdefault(os.path.normpath(row[0]), [])
376+
o.append(str(d))
377+
378+
return file_owners
379+
380+
381+
def _report_file_owner_conflicts(
382+
lib_dir, # type: str
383+
name, # type: str
384+
source_dir, # type: str
385+
info_dir, # type: str
386+
):
387+
# type: (...) -> None
388+
"""Report files owned by other distributions that are being overwritten.
389+
390+
Scan the lib_dir for distributions that own files under the same
391+
top level directory as the wheel being installed and report any
392+
files owned by those other distributions that are going to be
393+
overwritten.
394+
395+
"""
396+
installing_top_level_path = os.path.join(
397+
source_dir, info_dir, 'top_level.txt')
398+
if not os.path.exists(installing_top_level_path):
399+
# We cannot determine the top level directory, so there is no
400+
# point in continuing.
401+
return
402+
403+
with open(installing_top_level_path, 'r') as fd:
404+
installing_top_level = fd.read().strip()
405+
files_from_other_owners = _get_file_owners(
406+
lib_dir, installing_top_level, name)
407+
if not files_from_other_owners:
408+
# Nothing else owns files under this top level directory, so
409+
# we don't need to scan the source.
410+
return
411+
412+
for dir, subdirs, files in os.walk(source_dir):
413+
basedir = dir[len(source_dir):].lstrip(os.path.sep)
414+
for f in files:
415+
partial_src = os.path.join(basedir, f)
416+
if partial_src not in files_from_other_owners:
417+
# There are no other owners for this file.
418+
continue
419+
destfile = os.path.join(lib_dir, basedir, f)
420+
for owner in files_from_other_owners[partial_src]:
421+
warnings.warn(
422+
'Overwriting or removing {} for {} '
423+
'which is also owned by {}'.format(
424+
destfile, name, owner),
425+
FutureWarning)
426+
427+
329428
def install_unpacked_wheel(
330429
name, # type: str
331430
wheeldir, # type: str
@@ -458,6 +557,12 @@ def clobber(
458557
changed = fixer(destfile)
459558
record_installed(srcfile, destfile, changed)
460559

560+
# Look for packages containing files that are already using the
561+
# same toplevel directory as the wheel we are installing but that
562+
# have a different dist name. Do this before calling clobber(), so
563+
# that when the warning is eventually changed to a hard error no
564+
# partial installation occurs.
565+
_report_file_owner_conflicts(lib_dir, name, source, info_dir)
461566
clobber(
462567
ensure_text(source, encoding=sys.getfilesystemencoding()),
463568
ensure_text(lib_dir, encoding=sys.getfilesystemencoding()),

tests/functional/test_install_wheel.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,3 +619,57 @@ def test_wheel_install_fails_with_badly_encoded_metadata(script):
619619
assert "Error decoding metadata for" in result.stderr
620620
assert "simple-0.1.0-py2.py3-none-any.whl" in result.stderr
621621
assert "METADATA" in result.stderr
622+
623+
624+
def test_report_file_owner_conflicts(script, tmpdir):
625+
"""
626+
Test installing from a wheel that wants to overwrite
627+
files owned by another wheel to ensure that a warning
628+
is reported and that the second wheel does overwrite
629+
the first.
630+
"""
631+
from tests.lib import TestFailure
632+
633+
make_wheel_with_file(
634+
name="wheel1",
635+
version="1.0",
636+
extra_files={
637+
'thetop/a.py': '# from wheel1',
638+
},
639+
extra_metadata_files={
640+
'top_level.txt': 'thetop',
641+
},
642+
).save_to_dir(tmpdir)
643+
644+
make_wheel_with_file(
645+
name="wheel2",
646+
version="2.0",
647+
extra_files={
648+
'thetop/a.py': '# from wheel2',
649+
},
650+
extra_metadata_files={
651+
'top_level.txt': 'thetop',
652+
},
653+
).save_to_dir(tmpdir)
654+
655+
result = script.pip(
656+
'install', 'wheel1', 'wheel2', '--no-index',
657+
'--find-links', tmpdir,
658+
)
659+
660+
# FIXME: Both wheels should be installed, for now. In the future
661+
# when the warning is changed to a hard error this part of the
662+
# test needs to be updated to show that only wheel1 is installed.
663+
with pytest.raises(TestFailure):
664+
result.assert_installed('wheel1')
665+
result.assert_installed('wheel2')
666+
667+
# FIXME: When the warning is changed to a hard error, wheel2 won't
668+
# be installed and the file contents will be those from wheel1.
669+
thetop = script.site_packages_path / 'thetop'
670+
assert (thetop / 'a.py').read_text() == '# from wheel2'
671+
672+
full_path = thetop / 'a.py'
673+
msg = 'Overwriting or removing {} for {} which is also owned by {}'.format(
674+
full_path, 'wheel2', 'wheel1 1.0')
675+
assert msg in result.stderr

0 commit comments

Comments
 (0)