Skip to content

Commit 778b42f

Browse files
authored
Merge pull request #12078 from pfmoore/core_metadata
Fix parsing of JSON index dist-info-metadata values
2 parents 98814e5 + a934f38 commit 778b42f

File tree

3 files changed

+159
-53
lines changed

3 files changed

+159
-53
lines changed

news/12042.bugfix.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Correctly parse ``dist-info-metadata`` values from JSON-format index data.

src/pip/_internal/models/link.py

Lines changed: 82 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -69,18 +69,6 @@ class LinkHash:
6969
def __post_init__(self) -> None:
7070
assert self.name in _SUPPORTED_HASHES
7171

72-
@classmethod
73-
def parse_pep658_hash(cls, dist_info_metadata: str) -> Optional["LinkHash"]:
74-
"""Parse a PEP 658 data-dist-info-metadata hash."""
75-
if dist_info_metadata == "true":
76-
return None
77-
name, sep, value = dist_info_metadata.partition("=")
78-
if not sep:
79-
return None
80-
if name not in _SUPPORTED_HASHES:
81-
return None
82-
return cls(name=name, value=value)
83-
8472
@classmethod
8573
@functools.lru_cache(maxsize=None)
8674
def find_hash_url_fragment(cls, url: str) -> Optional["LinkHash"]:
@@ -107,6 +95,28 @@ def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool:
10795
return hashes.is_hash_allowed(self.name, hex_digest=self.value)
10896

10997

98+
@dataclass(frozen=True)
99+
class MetadataFile:
100+
"""Information about a core metadata file associated with a distribution."""
101+
102+
hashes: Optional[Dict[str, str]]
103+
104+
def __post_init__(self) -> None:
105+
if self.hashes is not None:
106+
assert all(name in _SUPPORTED_HASHES for name in self.hashes)
107+
108+
109+
def supported_hashes(hashes: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
110+
# Remove any unsupported hash types from the mapping. If this leaves no
111+
# supported hashes, return None
112+
if hashes is None:
113+
return None
114+
hashes = {n: v for n, v in hashes.items() if n in _SUPPORTED_HASHES}
115+
if not hashes:
116+
return None
117+
return hashes
118+
119+
110120
def _clean_url_path_part(part: str) -> str:
111121
"""
112122
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
@@ -179,7 +189,7 @@ class Link(KeyBasedCompareMixin):
179189
"comes_from",
180190
"requires_python",
181191
"yanked_reason",
182-
"dist_info_metadata",
192+
"metadata_file_data",
183193
"cache_link_parsing",
184194
"egg_fragment",
185195
]
@@ -190,7 +200,7 @@ def __init__(
190200
comes_from: Optional[Union[str, "IndexContent"]] = None,
191201
requires_python: Optional[str] = None,
192202
yanked_reason: Optional[str] = None,
193-
dist_info_metadata: Optional[str] = None,
203+
metadata_file_data: Optional[MetadataFile] = None,
194204
cache_link_parsing: bool = True,
195205
hashes: Optional[Mapping[str, str]] = None,
196206
) -> None:
@@ -208,18 +218,21 @@ def __init__(
208218
a simple repository HTML link. If the file has been yanked but
209219
no reason was provided, this should be the empty string. See
210220
PEP 592 for more information and the specification.
211-
:param dist_info_metadata: the metadata attached to the file, or None if no such
212-
metadata is provided. This is the value of the "data-dist-info-metadata"
213-
attribute, if present, in a simple repository HTML link. This may be parsed
214-
into its own `Link` by `self.metadata_link()`. See PEP 658 for more
215-
information and the specification.
221+
:param metadata_file_data: the metadata attached to the file, or None if
222+
no such metadata is provided. This argument, if not None, indicates
223+
that a separate metadata file exists, and also optionally supplies
224+
hashes for that file.
216225
:param cache_link_parsing: A flag that is used elsewhere to determine
217226
whether resources retrieved from this link should be cached. PyPI
218227
URLs should generally have this set to False, for example.
219228
:param hashes: A mapping of hash names to digests to allow us to
220229
determine the validity of a download.
221230
"""
222231

232+
# The comes_from, requires_python, and metadata_file_data arguments are
233+
# only used by classmethods of this class, and are not used in client
234+
# code directly.
235+
223236
# url can be a UNC windows share
224237
if url.startswith("\\\\"):
225238
url = path_to_url(url)
@@ -239,7 +252,7 @@ def __init__(
239252
self.comes_from = comes_from
240253
self.requires_python = requires_python if requires_python else None
241254
self.yanked_reason = yanked_reason
242-
self.dist_info_metadata = dist_info_metadata
255+
self.metadata_file_data = metadata_file_data
243256

244257
super().__init__(key=url, defining_class=Link)
245258

@@ -262,9 +275,25 @@ def from_json(
262275
url = _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url))
263276
pyrequire = file_data.get("requires-python")
264277
yanked_reason = file_data.get("yanked")
265-
dist_info_metadata = file_data.get("dist-info-metadata")
266278
hashes = file_data.get("hashes", {})
267279

280+
# PEP 714: Indexes must use the name core-metadata, but
281+
# clients should support the old name as a fallback for compatibility.
282+
metadata_info = file_data.get("core-metadata")
283+
if metadata_info is None:
284+
metadata_info = file_data.get("dist-info-metadata")
285+
286+
# The metadata info value may be a boolean, or a dict of hashes.
287+
if isinstance(metadata_info, dict):
288+
# The file exists, and hashes have been supplied
289+
metadata_file_data = MetadataFile(supported_hashes(metadata_info))
290+
elif metadata_info:
291+
# The file exists, but there are no hashes
292+
metadata_file_data = MetadataFile(None)
293+
else:
294+
# False or not present: the file does not exist
295+
metadata_file_data = None
296+
268297
# The Link.yanked_reason expects an empty string instead of a boolean.
269298
if yanked_reason and not isinstance(yanked_reason, str):
270299
yanked_reason = ""
@@ -278,7 +307,7 @@ def from_json(
278307
requires_python=pyrequire,
279308
yanked_reason=yanked_reason,
280309
hashes=hashes,
281-
dist_info_metadata=dist_info_metadata,
310+
metadata_file_data=metadata_file_data,
282311
)
283312

284313
@classmethod
@@ -298,14 +327,39 @@ def from_element(
298327
url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href))
299328
pyrequire = anchor_attribs.get("data-requires-python")
300329
yanked_reason = anchor_attribs.get("data-yanked")
301-
dist_info_metadata = anchor_attribs.get("data-dist-info-metadata")
330+
331+
# PEP 714: Indexes must use the name data-core-metadata, but
332+
# clients should support the old name as a fallback for compatibility.
333+
metadata_info = anchor_attribs.get("data-core-metadata")
334+
if metadata_info is None:
335+
metadata_info = anchor_attribs.get("data-dist-info-metadata")
336+
# The metadata info value may be the string "true", or a string of
337+
# the form "hashname=hashval"
338+
if metadata_info == "true":
339+
# The file exists, but there are no hashes
340+
metadata_file_data = MetadataFile(None)
341+
elif metadata_info is None:
342+
# The file does not exist
343+
metadata_file_data = None
344+
else:
345+
# The file exists, and hashes have been supplied
346+
hashname, sep, hashval = metadata_info.partition("=")
347+
if sep == "=":
348+
metadata_file_data = MetadataFile(supported_hashes({hashname: hashval}))
349+
else:
350+
# Error - data is wrong. Treat as no hashes supplied.
351+
logger.debug(
352+
"Index returned invalid data-dist-info-metadata value: %s",
353+
metadata_info,
354+
)
355+
metadata_file_data = MetadataFile(None)
302356

303357
return cls(
304358
url,
305359
comes_from=page_url,
306360
requires_python=pyrequire,
307361
yanked_reason=yanked_reason,
308-
dist_info_metadata=dist_info_metadata,
362+
metadata_file_data=metadata_file_data,
309363
)
310364

311365
def __str__(self) -> str:
@@ -407,17 +461,13 @@ def subdirectory_fragment(self) -> Optional[str]:
407461
return match.group(1)
408462

409463
def metadata_link(self) -> Optional["Link"]:
410-
"""Implementation of PEP 658 parsing."""
411-
# Note that Link.from_element() parsing the "data-dist-info-metadata" attribute
412-
# from an HTML anchor tag is typically how the Link.dist_info_metadata attribute
413-
# gets set.
414-
if self.dist_info_metadata is None:
464+
"""Return a link to the associated core metadata file (if any)."""
465+
if self.metadata_file_data is None:
415466
return None
416467
metadata_url = f"{self.url_without_fragment}.metadata"
417-
metadata_link_hash = LinkHash.parse_pep658_hash(self.dist_info_metadata)
418-
if metadata_link_hash is None:
468+
if self.metadata_file_data.hashes is None:
419469
return Link(metadata_url)
420-
return Link(metadata_url, hashes=metadata_link_hash.as_dict())
470+
return Link(metadata_url, hashes=self.metadata_file_data.hashes)
421471

422472
def as_hashes(self) -> Hashes:
423473
return Hashes({k: [v] for k, v in self._hashes.items()})

tests/unit/test_collector.py

Lines changed: 76 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from pip._internal.models.link import (
3131
Link,
3232
LinkHash,
33+
MetadataFile,
3334
_clean_url_path,
3435
_ensure_quoted_url,
3536
)
@@ -485,13 +486,30 @@ def test_parse_links_json() -> None:
485486
"requires-python": ">=3.7",
486487
"dist-info-metadata": False,
487488
},
488-
# Same as above, but parsing dist-info-metadata.
489+
# Same as above, but parsing core-metadata.
489490
{
490491
"filename": "holygrail-1.0-py3-none-any.whl",
491492
"url": "/files/holygrail-1.0-py3-none-any.whl",
492493
"hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
493494
"requires-python": ">=3.7",
494-
"dist-info-metadata": "sha512=aabdd41",
495+
"core-metadata": {"sha512": "aabdd41"},
496+
},
497+
# Ensure fallback to dist-info-metadata works
498+
{
499+
"filename": "holygrail-1.0-py3-none-any.whl",
500+
"url": "/files/holygrail-1.0-py3-none-any.whl",
501+
"hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
502+
"requires-python": ">=3.7",
503+
"dist-info-metadata": {"sha512": "aabdd41"},
504+
},
505+
# Ensure that core-metadata gets priority.
506+
{
507+
"filename": "holygrail-1.0-py3-none-any.whl",
508+
"url": "/files/holygrail-1.0-py3-none-any.whl",
509+
"hashes": {"sha256": "sha256 hash", "blake2b": "blake2b hash"},
510+
"requires-python": ">=3.7",
511+
"core-metadata": {"sha512": "aabdd41"},
512+
"dist-info-metadata": {"sha512": "this_is_wrong"},
495513
},
496514
],
497515
}
@@ -527,7 +545,23 @@ def test_parse_links_json() -> None:
527545
requires_python=">=3.7",
528546
yanked_reason=None,
529547
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
530-
dist_info_metadata="sha512=aabdd41",
548+
metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
549+
),
550+
Link(
551+
"https://example.com/files/holygrail-1.0-py3-none-any.whl",
552+
comes_from=page.url,
553+
requires_python=">=3.7",
554+
yanked_reason=None,
555+
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
556+
metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
557+
),
558+
Link(
559+
"https://example.com/files/holygrail-1.0-py3-none-any.whl",
560+
comes_from=page.url,
561+
requires_python=">=3.7",
562+
yanked_reason=None,
563+
hashes={"sha256": "sha256 hash", "blake2b": "blake2b hash"},
564+
metadata_file_data=MetadataFile({"sha512": "aabdd41"}),
531565
),
532566
]
533567

@@ -585,30 +619,42 @@ def test_parse_links__yanked_reason(anchor_html: str, expected: Optional[str]) -
585619
),
586620
# Test with value "true".
587621
(
588-
'<a href="/pkg1-1.0.tar.gz" data-dist-info-metadata="true"></a>',
589-
"true",
622+
'<a href="/pkg1-1.0.tar.gz" data-core-metadata="true"></a>',
623+
MetadataFile(None),
590624
{},
591625
),
592626
# Test with a provided hash value.
593627
(
594-
'<a href="/pkg1-1.0.tar.gz" data-dist-info-metadata="sha256=aa113592bbe"></a>', # noqa: E501
595-
"sha256=aa113592bbe",
628+
'<a href="/pkg1-1.0.tar.gz" data-core-metadata="sha256=aa113592bbe"></a>', # noqa: E501
629+
MetadataFile({"sha256": "aa113592bbe"}),
596630
{},
597631
),
598632
# Test with a provided hash value for both the requirement as well as metadata.
599633
(
600-
'<a href="/pkg1-1.0.tar.gz#sha512=abc132409cb" data-dist-info-metadata="sha256=aa113592bbe"></a>', # noqa: E501
601-
"sha256=aa113592bbe",
634+
'<a href="/pkg1-1.0.tar.gz#sha512=abc132409cb" data-core-metadata="sha256=aa113592bbe"></a>', # noqa: E501
635+
MetadataFile({"sha256": "aa113592bbe"}),
602636
{"sha512": "abc132409cb"},
603637
),
638+
# Ensure the fallback to the old name works.
639+
(
640+
'<a href="/pkg1-1.0.tar.gz" data-dist-info-metadata="sha256=aa113592bbe"></a>', # noqa: E501
641+
MetadataFile({"sha256": "aa113592bbe"}),
642+
{},
643+
),
644+
# Ensure that the data-core-metadata name gets priority.
645+
(
646+
'<a href="/pkg1-1.0.tar.gz" data-core-metadata="sha256=aa113592bbe" data-dist-info-metadata="sha256=invalid_value"></a>', # noqa: E501
647+
MetadataFile({"sha256": "aa113592bbe"}),
648+
{},
649+
),
604650
],
605651
)
606-
def test_parse_links__dist_info_metadata(
652+
def test_parse_links__metadata_file_data(
607653
anchor_html: str,
608654
expected: Optional[str],
609655
hashes: Dict[str, str],
610656
) -> None:
611-
link = _test_parse_links_data_attribute(anchor_html, "dist_info_metadata", expected)
657+
link = _test_parse_links_data_attribute(anchor_html, "metadata_file_data", expected)
612658
assert link._hashes == hashes
613659

614660

@@ -1080,17 +1126,26 @@ def test_link_hash_parsing(url: str, result: Optional[LinkHash]) -> None:
10801126

10811127

10821128
@pytest.mark.parametrize(
1083-
"dist_info_metadata, result",
1129+
"metadata_attrib, expected",
10841130
[
1085-
("sha256=aa113592bbe", LinkHash("sha256", "aa113592bbe")),
1086-
("sha256=", LinkHash("sha256", "")),
1087-
("sha500=aa113592bbe", None),
1088-
("true", None),
1089-
("", None),
1090-
("aa113592bbe", None),
1131+
("sha256=aa113592bbe", MetadataFile({"sha256": "aa113592bbe"})),
1132+
("sha256=", MetadataFile({"sha256": ""})),
1133+
("sha500=aa113592bbe", MetadataFile(None)),
1134+
("true", MetadataFile(None)),
1135+
(None, None),
1136+
# Attribute is present but invalid
1137+
("", MetadataFile(None)),
1138+
("aa113592bbe", MetadataFile(None)),
10911139
],
10921140
)
1093-
def test_pep658_hash_parsing(
1094-
dist_info_metadata: str, result: Optional[LinkHash]
1141+
def test_metadata_file_info_parsing_html(
1142+
metadata_attrib: str, expected: Optional[MetadataFile]
10951143
) -> None:
1096-
assert LinkHash.parse_pep658_hash(dist_info_metadata) == result
1144+
attribs: Dict[str, Optional[str]] = {
1145+
"href": "something",
1146+
"data-dist-info-metadata": metadata_attrib,
1147+
}
1148+
page_url = "dummy_for_comes_from"
1149+
base_url = "https://index.url/simple"
1150+
link = Link.from_element(attribs, page_url, base_url)
1151+
assert link is not None and link.metadata_file_data == expected

0 commit comments

Comments
 (0)