Skip to content

Refactor BaseDistribution.metadata property to make sure it has Requires-Dist and Provides-Extra #11163

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 91 additions & 1 deletion src/pip/_internal/metadata/base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import csv
import email.message
import functools
import json
import logging
import pathlib
Expand All @@ -13,6 +14,7 @@
Iterable,
Iterator,
List,
NamedTuple,
Optional,
Tuple,
Union,
Expand All @@ -33,6 +35,7 @@
from pip._internal.utils.compat import stdlib_pkgs # TODO: Move definition here.
from pip._internal.utils.egg_link import egg_link_path_from_sys_path
from pip._internal.utils.misc import is_local, normalize_path
from pip._internal.utils.packaging import safe_extra
from pip._internal.utils.urls import url_to_path

if TYPE_CHECKING:
Expand Down Expand Up @@ -91,6 +94,12 @@ def _convert_installed_files_path(
return str(pathlib.Path(*info, *entry))


class RequiresEntry(NamedTuple):
requirement: str
extra: str
marker: str


class BaseDistribution(Protocol):
@classmethod
def from_directory(cls, directory: str) -> "BaseDistribution":
Expand Down Expand Up @@ -348,6 +357,17 @@ def read_text(self, path: InfoPath) -> str:
def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
raise NotImplementedError()

def _metadata_impl(self) -> email.message.Message:
raise NotImplementedError()

@functools.lru_cache(maxsize=1)
def _metadata_cached(self) -> email.message.Message:
# When we drop python 3.7 support, move this to the metadata property and use
# functools.cached_property instead of lru_cache.
metadata = self._metadata_impl()
self._add_egg_info_requires(metadata)
return metadata

@property
def metadata(self) -> email.message.Message:
"""Metadata of distribution parsed from e.g. METADATA or PKG-INFO.
Expand All @@ -357,7 +377,7 @@ def metadata(self) -> email.message.Message:
:raises NoneMetadataError: If the metadata file is available, but does
not contain valid metadata.
"""
raise NotImplementedError()
return self._metadata_cached()

@property
def metadata_version(self) -> Optional[str]:
Expand Down Expand Up @@ -451,6 +471,76 @@ def iter_declared_entries(self) -> Optional[Iterator[str]]:
or self._iter_declared_entries_from_legacy()
)

def _iter_requires_txt_entries(self) -> Iterator[RequiresEntry]:
"""Parse a ``requires.txt`` in an egg-info directory.

This is an INI-ish format where an egg-info stores dependencies. A
section name describes extra other environment markers, while each entry
is an arbitrary string (not a key-value pair) representing a dependency
as a requirement string (no markers).

There is a construct in ``importlib.metadata`` called ``Sectioned`` that
does mostly the same, but the format is currently considered private.
"""
try:
content = self.read_text("requires.txt")
except FileNotFoundError:
return
extra = marker = "" # Section-less entries don't have markers.
for line in content.splitlines():
line = line.strip()
if not line or line.startswith("#"): # Comment; ignored.
continue
if line.startswith("[") and line.endswith("]"): # A section header.
extra, _, marker = line.strip("[]").partition(":")
continue
yield RequiresEntry(requirement=line, extra=extra, marker=marker)

def _iter_egg_info_extras(self) -> Iterable[str]:
"""Get extras from the egg-info directory."""
known_extras = {""}
for entry in self._iter_requires_txt_entries():
if entry.extra in known_extras:
continue
known_extras.add(entry.extra)
yield entry.extra

def _iter_egg_info_dependencies(self) -> Iterable[str]:
"""Get distribution dependencies from the egg-info directory.

To ease parsing, this converts a legacy dependency entry into a PEP 508
requirement string. Like ``_iter_requires_txt_entries()``, there is code
in ``importlib.metadata`` that does mostly the same, but not do exactly
what we need.

Namely, ``importlib.metadata`` does not normalize the extra name before
putting it into the requirement string, which causes marker comparison
to fail because the dist-info format do normalize. This is consistent in
all currently available PEP 517 backends, although not standardized.
"""
for entry in self._iter_requires_txt_entries():
if entry.extra and entry.marker:
marker = f'({entry.marker}) and extra == "{safe_extra(entry.extra)}"'
elif entry.extra:
marker = f'extra == "{safe_extra(entry.extra)}"'
elif entry.marker:
marker = entry.marker
else:
marker = ""
if marker:
yield f"{entry.requirement} ; {marker}"
else:
yield entry.requirement

def _add_egg_info_requires(self, metadata: email.message.Message) -> None:
"""Add egg-info requires.txt information to the metadata."""
if not metadata.get_all("Requires-Dist"):
for dep in self._iter_egg_info_dependencies():
metadata["Requires-Dist"] = dep
if not metadata.get_all("Provides-Extra"):
for extra in self._iter_egg_info_extras():
metadata["Provides-Extra"] = extra


class BaseEnvironment:
"""An environment containing distributions to introspect."""
Expand Down
92 changes: 5 additions & 87 deletions src/pip/_internal/metadata/importlib/_dists.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,7 @@
import os
import pathlib
import zipfile
from typing import (
Collection,
Dict,
Iterable,
Iterator,
Mapping,
NamedTuple,
Optional,
Sequence,
)
from typing import Collection, Dict, Iterable, Iterator, Mapping, Optional, Sequence

from pip._vendor.packaging.requirements import Requirement
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
Expand Down Expand Up @@ -92,12 +83,6 @@ def read_text(self, filename: str) -> Optional[str]:
return text


class RequiresEntry(NamedTuple):
requirement: str
extra: str
marker: str


class Distribution(BaseDistribution):
def __init__(
self,
Expand Down Expand Up @@ -187,84 +172,17 @@ def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
# importlib.metadata's EntryPoint structure sasitfies BaseEntryPoint.
return self._dist.entry_points

@property
def metadata(self) -> email.message.Message:
def _metadata_impl(self) -> email.message.Message:
return self._dist.metadata

def _iter_requires_txt_entries(self) -> Iterator[RequiresEntry]:
"""Parse a ``requires.txt`` in an egg-info directory.

This is an INI-ish format where an egg-info stores dependencies. A
section name describes extra other environment markers, while each entry
is an arbitrary string (not a key-value pair) representing a dependency
as a requirement string (no markers).

There is a construct in ``importlib.metadata`` called ``Sectioned`` that
does mostly the same, but the format is currently considered private.
"""
content = self._dist.read_text("requires.txt")
if content is None:
return
extra = marker = "" # Section-less entries don't have markers.
for line in content.splitlines():
line = line.strip()
if not line or line.startswith("#"): # Comment; ignored.
continue
if line.startswith("[") and line.endswith("]"): # A section header.
extra, _, marker = line.strip("[]").partition(":")
continue
yield RequiresEntry(requirement=line, extra=extra, marker=marker)

def _iter_egg_info_extras(self) -> Iterable[str]:
"""Get extras from the egg-info directory."""
known_extras = {""}
for entry in self._iter_requires_txt_entries():
if entry.extra in known_extras:
continue
known_extras.add(entry.extra)
yield entry.extra

def iter_provided_extras(self) -> Iterable[str]:
iterator = (
self._dist.metadata.get_all("Provides-Extra")
or self._iter_egg_info_extras()
return (
safe_extra(extra) for extra in self.metadata.get_all("Provides-Extra", [])
)
return (safe_extra(extra) for extra in iterator)

def _iter_egg_info_dependencies(self) -> Iterable[str]:
"""Get distribution dependencies from the egg-info directory.

To ease parsing, this converts a legacy dependency entry into a PEP 508
requirement string. Like ``_iter_requires_txt_entries()``, there is code
in ``importlib.metadata`` that does mostly the same, but not do exactly
what we need.

Namely, ``importlib.metadata`` does not normalize the extra name before
putting it into the requirement string, which causes marker comparison
to fail because the dist-info format do normalize. This is consistent in
all currently available PEP 517 backends, although not standardized.
"""
for entry in self._iter_requires_txt_entries():
if entry.extra and entry.marker:
marker = f'({entry.marker}) and extra == "{safe_extra(entry.extra)}"'
elif entry.extra:
marker = f'extra == "{safe_extra(entry.extra)}"'
elif entry.marker:
marker = entry.marker
else:
marker = ""
if marker:
yield f"{entry.requirement} ; {marker}"
else:
yield entry.requirement

def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]:
req_string_iterator = (
self._dist.metadata.get_all("Requires-Dist")
or self._iter_egg_info_dependencies()
)
contexts: Sequence[Dict[str, str]] = [{"extra": safe_extra(e)} for e in extras]
for req_string in req_string_iterator:
for req_string in self.metadata.get_all("Requires-Dist", []):
req = Requirement(req_string)
if not req.marker:
yield req
Expand Down
3 changes: 1 addition & 2 deletions src/pip/_internal/metadata/pkg_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,7 @@ def iter_entry_points(self) -> Iterable[BaseEntryPoint]:
name, _, value = str(entry_point).partition("=")
yield EntryPoint(name=name.strip(), value=value.strip(), group=group)

@property
def metadata(self) -> email.message.Message:
def _metadata_impl(self) -> email.message.Message:
"""
:raises NoneMetadataError: if the distribution reports `has_metadata()`
True but `get_metadata()` returns None.
Expand Down
29 changes: 28 additions & 1 deletion tests/unit/metadata/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import logging
from pathlib import Path
from typing import cast
from unittest import mock

import pytest
from pip._vendor.packaging.utils import NormalizedName

from pip._internal.metadata import BaseDistribution
from pip._internal.metadata import BaseDistribution, get_directory_distribution
from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, ArchiveInfo


Expand Down Expand Up @@ -40,6 +41,32 @@ class FakeDistribution(BaseDistribution):
)


def test_metadata_reads_egg_info_requires_txt(tmp_path: Path) -> None:
"""Check Requires-Dist is obtained from requires.txt if absent in PKG-INFO."""
egg_info_path = tmp_path / "whatever.egg-info"
egg_info_path.mkdir()
dist = get_directory_distribution(str(egg_info_path))
assert dist.installed_with_setuptools_egg_info
pkg_info_path = egg_info_path / "PKG-INFO"
pkg_info_path.write_text("Name: whatever\n")
egg_info_path.joinpath("requires.txt").write_text("pkga\npkgb\n")
assert dist.metadata.get_all("Requires-Dist") == ["pkga", "pkgb"]


def test_metadata_pkg_info_requires_priority(tmp_path: Path) -> None:
"""Check Requires-Dist in PKG-INFO has priority over requires.txt."""
egg_info_path = tmp_path / "whatever.egg-info"
egg_info_path.mkdir()
dist = get_directory_distribution(str(egg_info_path))
assert dist.installed_with_setuptools_egg_info
pkg_info_path = egg_info_path / "PKG-INFO"
pkg_info_path.write_text(
"Name: whatever\nRequires-Dist: pkgc\nRequires-Dist: pkgd\n"
)
egg_info_path.joinpath("requires.txt").write_text("pkga\npkgb\n")
assert dist.metadata.get_all("Requires-Dist") == ["pkgc", "pkgd"]


@mock.patch.object(
BaseDistribution,
"read_text",
Expand Down