Skip to content

Commit 36b0fa8

Browse files
make LinkMetadataCache
- catch an exception when parsing metadata which only occurs in CI - handle --no-cache-dir - call os.makedirs() before writing to cache too - catch InvalidSchema when attempting git urls with BatchDownloader - fix other test failures - reuse should_cache(req) logic - gzip compress link metadata for a slight reduction in disk space - only cache built sdists - don't check should_cache() when fetching - cache lazy wheel dists - add news - turn debug logs in fetching from cache into exceptions - use scandir over listdir when searching normal wheel cache - handle metadata email parsing errors - correctly handle mutable cached requirement - use bz2 over gzip for an extremely slight improvement in disk usage
1 parent dc4582a commit 36b0fa8

21 files changed

+391
-153
lines changed

news/12256.feature.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Cache computed metadata from sdists and lazy wheels in ``~/.cache/pip/link-metadata`` when ``--use-feature=metadata-cache`` is enabled.

src/pip/_internal/cache.py

Lines changed: 102 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
"""Cache Management
22
"""
33

4+
import abc
45
import hashlib
56
import json
67
import logging
78
import os
9+
import re
810
from pathlib import Path
9-
from typing import Any, Dict, List, Optional
11+
from typing import Dict, Iterator, List, Optional, Tuple
1012

1113
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
1214
from pip._vendor.packaging.utils import canonicalize_name
@@ -15,21 +17,71 @@
1517
from pip._internal.models.direct_url import DirectUrl
1618
from pip._internal.models.link import Link
1719
from pip._internal.models.wheel import Wheel
20+
from pip._internal.req.req_install import InstallRequirement
1821
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
1922
from pip._internal.utils.urls import path_to_url
23+
from pip._internal.vcs import vcs
2024

2125
logger = logging.getLogger(__name__)
2226

27+
_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE)
28+
2329
ORIGIN_JSON_NAME = "origin.json"
2430

2531

32+
def _contains_egg_info(s: str) -> bool:
33+
"""Determine whether the string looks like an egg_info.
34+
35+
:param s: The string to parse. E.g. foo-2.1
36+
"""
37+
return bool(_egg_info_re.search(s))
38+
39+
40+
def should_cache(
41+
req: InstallRequirement,
42+
) -> bool:
43+
"""
44+
Return whether a built InstallRequirement can be stored in the persistent
45+
wheel cache, assuming the wheel cache is available, and _should_build()
46+
has determined a wheel needs to be built.
47+
"""
48+
if not req.link:
49+
return False
50+
51+
if req.link.is_wheel:
52+
return False
53+
54+
if req.editable or not req.source_dir:
55+
# never cache editable requirements
56+
return False
57+
58+
if req.link and req.link.is_vcs:
59+
# VCS checkout. Do not cache
60+
# unless it points to an immutable commit hash.
61+
assert not req.editable
62+
assert req.source_dir
63+
vcs_backend = vcs.get_backend_for_scheme(req.link.scheme)
64+
assert vcs_backend
65+
if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir):
66+
return True
67+
return False
68+
69+
assert req.link
70+
base, ext = req.link.splitext()
71+
if _contains_egg_info(base):
72+
return True
73+
74+
# Otherwise, do not cache.
75+
return False
76+
77+
2678
def _hash_dict(d: Dict[str, str]) -> str:
2779
"""Return a stable sha224 of a dictionary."""
2880
s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
2981
return hashlib.sha224(s.encode("ascii")).hexdigest()
3082

3183

32-
class Cache:
84+
class Cache(abc.ABC):
3385
"""An abstract class - provides cache directories for data from links
3486
3587
:param cache_dir: The root of the cache.
@@ -73,20 +125,28 @@ def _get_cache_path_parts(self, link: Link) -> List[str]:
73125

74126
return parts
75127

76-
def _get_candidates(self, link: Link, canonical_package_name: str) -> List[Any]:
77-
can_not_cache = not self.cache_dir or not canonical_package_name or not link
78-
if can_not_cache:
79-
return []
128+
@abc.abstractmethod
129+
def get_path_for_link(self, link: Link) -> str:
130+
"""Return a directory to store cached items in for link."""
131+
...
132+
133+
def cache_path(self, link: Link) -> Path:
134+
return Path(self.get_path_for_link(link))
80135

81-
path = self.get_path_for_link(link)
82-
if os.path.isdir(path):
83-
return [(candidate, path) for candidate in os.listdir(path)]
84-
return []
136+
137+
class LinkMetadataCache(Cache):
138+
"""Persistently store the metadata of dists found at each link."""
85139

86140
def get_path_for_link(self, link: Link) -> str:
87-
"""Return a directory to store cached items in for link."""
88-
raise NotImplementedError()
141+
parts = self._get_cache_path_parts(link)
142+
assert self.cache_dir
143+
return os.path.join(self.cache_dir, "link-metadata", *parts)
144+
89145

146+
class WheelCacheBase(Cache):
147+
"""Specializations to the cache concept for wheels."""
148+
149+
@abc.abstractmethod
90150
def get(
91151
self,
92152
link: Link,
@@ -96,10 +156,27 @@ def get(
96156
"""Returns a link to a cached item if it exists, otherwise returns the
97157
passed link.
98158
"""
99-
raise NotImplementedError()
159+
...
160+
161+
def _can_cache(self, link: Link, canonical_package_name: str) -> bool:
162+
return bool(self.cache_dir and canonical_package_name and link)
100163

164+
def _get_candidates(
165+
self, link: Link, canonical_package_name: str
166+
) -> Iterator[Tuple[str, str]]:
167+
if not self._can_cache(link, canonical_package_name):
168+
return
169+
170+
path = self.get_path_for_link(link)
171+
if not os.path.isdir(path):
172+
return
101173

102-
class SimpleWheelCache(Cache):
174+
for candidate in os.scandir(path):
175+
if candidate.is_file():
176+
yield (candidate.name, path)
177+
178+
179+
class SimpleWheelCache(WheelCacheBase):
103180
"""A cache of wheels for future installs."""
104181

105182
def __init__(self, cache_dir: str) -> None:
@@ -131,7 +208,7 @@ def get(
131208
package_name: Optional[str],
132209
supported_tags: List[Tag],
133210
) -> Link:
134-
candidates = []
211+
candidates: List[Tuple[int, str, str]] = []
135212

136213
if not package_name:
137214
return link
@@ -205,7 +282,7 @@ def __init__(
205282
)
206283

207284

208-
class WheelCache(Cache):
285+
class WheelCache(WheelCacheBase):
209286
"""Wraps EphemWheelCache and SimpleWheelCache into a single Cache
210287
211288
This Cache allows for gracefully degradation, using the ephem wheel cache
@@ -223,6 +300,15 @@ def get_path_for_link(self, link: Link) -> str:
223300
def get_ephem_path_for_link(self, link: Link) -> str:
224301
return self._ephem_cache.get_path_for_link(link)
225302

303+
def resolve_cache_dir(self, req: InstallRequirement) -> str:
304+
"""Return the persistent or temporary cache directory where the built or
305+
downloaded wheel should be stored."""
306+
cache_available = bool(self.cache_dir)
307+
assert req.link, req
308+
if cache_available and should_cache(req):
309+
return self.get_path_for_link(req.link)
310+
return self.get_ephem_path_for_link(req.link)
311+
226312
def get(
227313
self,
228314
link: Link,

src/pip/_internal/cli/cmdoptions.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,7 @@ def check_list_path_option(options: Values) -> None:
10081008
default=[],
10091009
choices=[
10101010
"fast-deps",
1011+
"metadata-cache",
10111012
"truststore",
10121013
]
10131014
+ ALWAYS_ENABLED_FEATURES,

src/pip/_internal/cli/req_command.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from optparse import Values
1313
from typing import TYPE_CHECKING, Any, List, Optional, Tuple
1414

15-
from pip._internal.cache import WheelCache
15+
from pip._internal.cache import LinkMetadataCache, WheelCache
1616
from pip._internal.cli import cmdoptions
1717
from pip._internal.cli.base_command import Command
1818
from pip._internal.cli.command_context import CommandContextMixIn
@@ -305,6 +305,16 @@ def make_requirement_preparer(
305305
"fast-deps has no effect when used with the legacy resolver."
306306
)
307307

308+
if options.cache_dir and "metadata-cache" in options.features_enabled:
309+
logger.warning(
310+
"pip is using a local cache for metadata information. "
311+
"This experimental feature is enabled through "
312+
"--use-feature=metadata-cache and it is not ready for "
313+
"production."
314+
)
315+
metadata_cache = LinkMetadataCache(options.cache_dir)
316+
else:
317+
metadata_cache = None
308318
return RequirementPreparer(
309319
build_dir=temp_build_dir_path,
310320
src_dir=options.src_dir,
@@ -320,6 +330,7 @@ def make_requirement_preparer(
320330
lazy_wheel=lazy_wheel,
321331
verbosity=verbosity,
322332
legacy_resolver=legacy_resolver,
333+
metadata_cache=metadata_cache,
323334
)
324335

325336
@classmethod

src/pip/_internal/exceptions.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,25 @@ def __str__(self) -> str:
250250
return f"None {self.metadata_name} metadata found for distribution: {self.dist}"
251251

252252

253+
class CacheMetadataError(PipError):
254+
"""Raised when de/serializing a requirement into the metadata cache."""
255+
256+
def __init__(
257+
self,
258+
req: "InstallRequirement",
259+
reason: str,
260+
) -> None:
261+
"""
262+
:param req: The requirement we attempted to cache.
263+
:param reason: Context about the precise error that occurred.
264+
"""
265+
self.req = req
266+
self.reason = reason
267+
268+
def __str__(self) -> str:
269+
return f"{self.reason} for {self.req} from {self.req.link}"
270+
271+
253272
class UserInstallationInvalid(InstallationError):
254273
"""A --user install is requested on an environment without user site."""
255274

src/pip/_internal/metadata/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,14 @@
66

77
from pip._internal.utils.misc import strtobool
88

9-
from .base import BaseDistribution, BaseEnvironment, FilesystemWheel, MemoryWheel, Wheel
9+
from .base import (
10+
BaseDistribution,
11+
BaseEnvironment,
12+
FilesystemWheel,
13+
MemoryWheel,
14+
Wheel,
15+
serialize_metadata,
16+
)
1017

1118
if TYPE_CHECKING:
1219
from typing import Literal, Protocol
@@ -23,6 +30,7 @@
2330
"get_environment",
2431
"get_wheel_distribution",
2532
"select_backend",
33+
"serialize_metadata",
2634
]
2735

2836

src/pip/_internal/metadata/base.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import csv
2+
import email.generator
23
import email.message
4+
import email.policy
35
import functools
6+
import io
47
import json
58
import logging
69
import pathlib
@@ -97,6 +100,18 @@ def _convert_installed_files_path(
97100
return str(pathlib.Path(*info, *entry))
98101

99102

103+
def serialize_metadata(msg: email.message.Message) -> str:
104+
"""Write a dist's metadata to a string.
105+
106+
Calling ``str(dist.metadata)`` may raise an error by misinterpreting RST directives
107+
as email headers. This method uses the more robust ``email.policy.EmailPolicy`` to
108+
avoid those parsing errors."""
109+
out = io.StringIO()
110+
g = email.generator.Generator(out, policy=email.policy.EmailPolicy())
111+
g.flatten(msg)
112+
return out.getvalue()
113+
114+
100115
class RequiresEntry(NamedTuple):
101116
requirement: str
102117
extra: str

src/pip/_internal/network/download.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def _get_http_response_filename(resp: Response, link: Link) -> str:
113113

114114

115115
def _http_get_download(session: PipSession, link: Link) -> Response:
116-
target_url = link.url.split("#", 1)[0]
116+
target_url = link.url_without_fragment
117117
resp = session.get(target_url, headers=HEADERS, stream=True)
118118
raise_for_status(resp)
119119
return resp

0 commit comments

Comments
 (0)