Skip to content

Commit b09437f

Browse files
handle metadata email parsing errors
1 parent 5a34ca3 commit b09437f

File tree

4 files changed

+56
-30
lines changed

4 files changed

+56
-30
lines changed

src/pip/_internal/exceptions.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,25 @@ def __str__(self) -> str:
253253
)
254254

255255

256+
class CacheMetadataError(PipError):
257+
"""Raised when de/serializing a requirement into the metadata cache."""
258+
259+
def __init__(
260+
self,
261+
req: "InstallRequirement",
262+
reason: str,
263+
) -> None:
264+
"""
265+
:param req: The requirement we attempted to cache.
266+
:param reason: Context about the precise error that occurred.
267+
"""
268+
self.req = req
269+
self.reason = reason
270+
271+
def __str__(self) -> str:
272+
return f"{self.reason} for {self.req} from {self.req.link}"
273+
274+
256275
class UserInstallationInvalid(InstallationError):
257276
"""A --user install is requested on an environment without user site."""
258277

src/pip/_internal/metadata/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,14 @@
66

77
from pip._internal.utils.misc import strtobool
88

9-
from .base import BaseDistribution, BaseEnvironment, FilesystemWheel, MemoryWheel, Wheel
9+
from .base import (
10+
BaseDistribution,
11+
BaseEnvironment,
12+
FilesystemWheel,
13+
MemoryWheel,
14+
Wheel,
15+
serialize_metadata,
16+
)
1017

1118
if TYPE_CHECKING:
1219
from typing import Literal, Protocol
@@ -23,6 +30,7 @@
2330
"get_environment",
2431
"get_wheel_distribution",
2532
"select_backend",
33+
"serialize_metadata",
2634
]
2735

2836

src/pip/_internal/metadata/base.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import csv
2+
import email.generator
23
import email.message
4+
import email.policy
35
import functools
6+
import io
47
import json
58
import logging
69
import pathlib
@@ -97,6 +100,18 @@ def _convert_installed_files_path(
97100
return str(pathlib.Path(*info, *entry))
98101

99102

103+
def serialize_metadata(msg: email.message.Message) -> str:
104+
"""Write a dist's metadata to a string.
105+
106+
Calling ``str(dist.metadata)`` may raise an error by misinterpreting RST directives
107+
as email headers. This method uses the more robust ``email.policy.EmailPolicy`` to
108+
avoid those parsing errors."""
109+
out = io.StringIO()
110+
g = email.generator.Generator(out, policy=email.policy.EmailPolicy())
111+
g.flatten(msg)
112+
return out.getvalue()
113+
114+
100115
class RequiresEntry(NamedTuple):
101116
requirement: str
102117
extra: str

src/pip/_internal/operations/prepare.py

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
# The following comment should be removed at some point in the future.
55
# mypy: strict-optional=False
66

7-
import email.errors
87
import gzip
98
import json
109
import mimetypes
@@ -20,6 +19,7 @@
2019
from pip._internal.cache import LinkMetadataCache, should_cache
2120
from pip._internal.distributions import make_distribution_for_install_requirement
2221
from pip._internal.exceptions import (
22+
CacheMetadataError,
2323
DirectoryUrlHashUnsupported,
2424
HashMismatch,
2525
HashUnpinned,
@@ -32,6 +32,7 @@
3232
from pip._internal.metadata import (
3333
BaseDistribution,
3434
get_metadata_distribution,
35+
serialize_metadata,
3536
)
3637
from pip._internal.models.direct_url import ArchiveInfo
3738
from pip._internal.models.link import Link
@@ -230,7 +231,7 @@ class CacheableDist:
230231
def from_dist(cls, link: Link, dist: BaseDistribution) -> "CacheableDist":
231232
"""Extract the serializable data necessary to generate a metadata-only dist."""
232233
return cls(
233-
metadata=str(dist.metadata),
234+
metadata=serialize_metadata(dist.metadata),
234235
filename=Path(link.filename),
235236
canonical_name=dist.canonical_name,
236237
)
@@ -251,7 +252,7 @@ def to_json(self) -> Dict[str, Any]:
251252
}
252253

253254
@classmethod
254-
def from_json(cls, args: Dict[str, Any]) -> "CacheableDist":
255+
def from_json(cls, args: Dict[str, str]) -> "CacheableDist":
255256
return cls(
256257
metadata=args["metadata"],
257258
filename=Path(args["filename"]),
@@ -458,17 +459,10 @@ def _fetch_cached_metadata(
458459
"found cached metadata for link %s at %s", req.link, f.name
459460
)
460461
args = json.load(f)
461-
cached_dist = CacheableDist.from_json(args)
462-
return cached_dist.to_dist()
463-
except (OSError, json.JSONDecodeError, KeyError) as e:
464-
logger.exception(
465-
"error reading cached metadata for link %s at %s %s(%s)",
466-
req.link,
467-
cached_path,
468-
e.__class__.__name__,
469-
str(e),
470-
)
471-
raise
462+
cached_dist = CacheableDist.from_json(args)
463+
return cached_dist.to_dist()
464+
except Exception:
465+
raise CacheMetadataError(req, "error reading cached metadata")
472466

473467
def _cache_metadata(
474468
self,
@@ -490,23 +484,13 @@ def _cache_metadata(
490484
# containing directory for the cache file exists before writing.
491485
os.makedirs(str(cached_path.parent), exist_ok=True)
492486
try:
487+
cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist)
488+
args = cacheable_dist.to_json()
489+
logger.debug("caching metadata for link %s at %s", req.link, cached_path)
493490
with gzip.open(cached_path, mode="wt", encoding="utf-8") as f:
494-
cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist)
495-
args = cacheable_dist.to_json()
496-
logger.debug("caching metadata for link %s at %s", req.link, f.name)
497491
json.dump(args, f)
498-
except (OSError, email.errors.HeaderParseError) as e:
499-
# TODO: Some dists raise email.errors.HeaderParseError when calling str() or
500-
# bytes() on the metadata, which is an email.Message. This is probably a bug
501-
# in email parsing.
502-
logger.exception(
503-
"error caching metadata for dist %s from %s: %s(%s)",
504-
metadata_dist,
505-
req.link,
506-
e.__class__.__name__,
507-
str(e),
508-
)
509-
raise
492+
except Exception:
493+
raise CacheMetadataError(req, "failed to serialize metadata")
510494

511495
def _fetch_metadata_using_link_data_attr(
512496
self,

0 commit comments

Comments
 (0)