Skip to content

Commit 8c755e8

Browse files
add PEP 658 support!!!
move url cleaning to link.py use a nice dataclass to decouple hash parsing from Link avoid downloading wheels when testing the resolver in isolation avoid special-casing the python version requirement in download.py streamline the RequirementSetWithCandidates invocation restore _clean_link method from collector.py to pass tests
1 parent d4ccc39 commit 8c755e8

File tree

12 files changed

+400
-174
lines changed

12 files changed

+400
-174
lines changed

src/pip/_internal/commands/download.py

Lines changed: 112 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,99 @@
11
import json
22
import logging
33
import os
4+
from dataclasses import dataclass, field
45
from optparse import Values
5-
from typing import Dict, List
6+
from typing import Any, Dict, List, Optional, Union
7+
8+
from pip._vendor.packaging.requirements import Requirement
69

710
from pip._internal.cli import cmdoptions
811
from pip._internal.cli.cmdoptions import make_target_python
912
from pip._internal.cli.req_command import RequirementCommand, with_cleanup
1013
from pip._internal.cli.status_codes import SUCCESS
14+
from pip._internal.models.link import Link, LinkHash
1115
from pip._internal.req.req_tracker import get_requirement_tracker
16+
from pip._internal.resolution.base import RequirementSetWithCandidates
1217
from pip._internal.utils.misc import ensure_dir, normalize_path, write_output
1318
from pip._internal.utils.temp_dir import TempDirectory
1419

1520
logger = logging.getLogger(__name__)
1621

1722

23+
@dataclass(frozen=True)
24+
class DistInfoMetadata:
25+
"""???/From PEP 658"""
26+
27+
metadata_url: str
28+
metadata_hash: Optional[LinkHash]
29+
30+
@classmethod
31+
def from_link(cls, link: Link) -> Optional["DistInfoMetadata"]:
32+
if link.dist_info_metadata is None:
33+
return None
34+
35+
metadata_url = f"{link.url_without_fragment}.metadata"
36+
if link.dist_info_metadata == "true":
37+
metadata_hash = None
38+
else:
39+
metadata_hash = LinkHash.split_hash_name_and_value(link.dist_info_metadata)
40+
41+
return cls(metadata_url=metadata_url, metadata_hash=metadata_hash)
42+
43+
def as_json(self) -> Dict[str, Union[str, Optional[Dict[str, str]]]]:
44+
return {
45+
"metadata_url": self.metadata_url,
46+
"metadata_hash": (
47+
self.metadata_hash.as_json() if self.metadata_hash else None
48+
),
49+
}
50+
51+
52+
@dataclass(frozen=True)
53+
class RequirementDownloadInfo:
54+
req: Requirement
55+
url: str
56+
file_hash: Optional[LinkHash]
57+
dist_info_metadata: Optional[DistInfoMetadata]
58+
59+
@classmethod
60+
def from_req_and_link(
61+
cls,
62+
req: Requirement,
63+
link: Link,
64+
) -> "RequirementDownloadInfo":
65+
return cls(
66+
req=req,
67+
url=link.url,
68+
file_hash=link.get_link_hash(),
69+
dist_info_metadata=DistInfoMetadata.from_link(link),
70+
)
71+
72+
def as_json(self) -> Dict[str, Any]:
73+
return {
74+
"req": str(self.req),
75+
"url": self.url,
76+
"hash": self.file_hash and self.file_hash.as_json(),
77+
"dist_info_metadata": (
78+
self.dist_info_metadata and self.dist_info_metadata.as_json()
79+
),
80+
}
81+
82+
83+
@dataclass
84+
class DownloadInfos:
85+
implicit_requirements: List[Requirement] = field(default_factory=list)
86+
resolution: Dict[str, RequirementDownloadInfo] = field(default_factory=dict)
87+
88+
def as_json(self) -> Dict[str, Any]:
89+
return {
90+
"implicit_requirements": [str(req) for req in self.implicit_requirements],
91+
"resolution": {
92+
name: info.as_json() for name, info in self.resolution.items()
93+
},
94+
}
95+
96+
1897
class DownloadCommand(RequirementCommand):
1998
"""
2099
Download packages from:
@@ -149,24 +228,46 @@ def run(self, options: Values, args: List[str]) -> int:
149228
requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
150229

151230
downloaded: List[str] = []
152-
download_infos: List[Dict[str, str]] = []
153231
for req in requirement_set.requirements.values():
232+
# If this distribution was not already satisfied, that means we
233+
# downloaded it.
154234
if req.satisfied_by is None:
155-
assert req.name is not None
156-
assert req.link is not None
157-
download_infos.append(
158-
{
159-
"name": req.name,
160-
"url": req.link.url,
161-
}
162-
)
163235
preparer.save_linked_requirement(req)
236+
assert req.name is not None
164237
downloaded.append(req.name)
165238

239+
download_infos = DownloadInfos()
240+
if options.print_download_urls:
241+
if isinstance(requirement_set, RequirementSetWithCandidates):
242+
for candidate in requirement_set.candidates.mapping.values():
243+
# This will occur for the python version requirement, for example.
244+
if candidate.name not in requirement_set.requirements:
245+
download_infos.implicit_requirements.append(
246+
candidate.as_serializable_requirement()
247+
)
248+
continue
249+
req = requirement_set.requirements[candidate.name]
250+
assert req.name is not None
251+
assert req.link is not None
252+
assert req.name not in download_infos.resolution
253+
download_infos.resolution[
254+
req.name
255+
] = RequirementDownloadInfo.from_req_and_link(
256+
req=candidate.as_serializable_requirement(),
257+
link=req.link,
258+
)
259+
else:
260+
logger.warning(
261+
"--print-download-urls is being used with the legacy resolver. "
262+
"The legacy resolver does not retain detailed dependency "
263+
"information, so all the fields in the output JSON file "
264+
"will be empty."
265+
)
266+
166267
if downloaded:
167268
write_output("Successfully downloaded %s", " ".join(downloaded))
168269
if options.print_download_urls:
169270
with open(options.print_download_urls, "w") as f:
170-
json.dump(download_infos, f, indent=4)
271+
json.dump(download_infos.as_json(), f, indent=4)
171272

172273
return SUCCESS

src/pip/_internal/index/collector.py

Lines changed: 3 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,8 @@
88
import itertools
99
import logging
1010
import os
11-
import re
1211
import urllib.parse
1312
import urllib.request
14-
import xml.etree.ElementTree
1513
from optparse import Values
1614
from typing import (
1715
Callable,
@@ -29,19 +27,18 @@
2927
from pip._vendor.requests.exceptions import RetryError, SSLError
3028

3129
from pip._internal.exceptions import NetworkConnectionError
32-
from pip._internal.models.link import Link
30+
from pip._internal.models.link import HTMLElement, Link
3331
from pip._internal.models.search_scope import SearchScope
3432
from pip._internal.network.session import PipSession
3533
from pip._internal.network.utils import raise_for_status
3634
from pip._internal.utils.filetypes import is_archive_file
37-
from pip._internal.utils.misc import pairwise, redact_auth_from_url
35+
from pip._internal.utils.misc import redact_auth_from_url
3836
from pip._internal.vcs import vcs
3937

4038
from .sources import CandidatesFromPage, LinkSource, build_source
4139

4240
logger = logging.getLogger(__name__)
4341

44-
HTMLElement = xml.etree.ElementTree.Element
4542
ResponseHeaders = MutableMapping[str, str]
4643

4744

@@ -171,94 +168,6 @@ def _determine_base_url(document: HTMLElement, page_url: str) -> str:
171168
return page_url
172169

173170

174-
def _clean_url_path_part(part: str) -> str:
175-
"""
176-
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
177-
"""
178-
# We unquote prior to quoting to make sure nothing is double quoted.
179-
return urllib.parse.quote(urllib.parse.unquote(part))
180-
181-
182-
def _clean_file_url_path(part: str) -> str:
183-
"""
184-
Clean the first part of a URL path that corresponds to a local
185-
filesystem path (i.e. the first part after splitting on "@" characters).
186-
"""
187-
# We unquote prior to quoting to make sure nothing is double quoted.
188-
# Also, on Windows the path part might contain a drive letter which
189-
# should not be quoted. On Linux where drive letters do not
190-
# exist, the colon should be quoted. We rely on urllib.request
191-
# to do the right thing here.
192-
return urllib.request.pathname2url(urllib.request.url2pathname(part))
193-
194-
195-
# percent-encoded: /
196-
_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE)
197-
198-
199-
def _clean_url_path(path: str, is_local_path: bool) -> str:
200-
"""
201-
Clean the path portion of a URL.
202-
"""
203-
if is_local_path:
204-
clean_func = _clean_file_url_path
205-
else:
206-
clean_func = _clean_url_path_part
207-
208-
# Split on the reserved characters prior to cleaning so that
209-
# revision strings in VCS URLs are properly preserved.
210-
parts = _reserved_chars_re.split(path)
211-
212-
cleaned_parts = []
213-
for to_clean, reserved in pairwise(itertools.chain(parts, [""])):
214-
cleaned_parts.append(clean_func(to_clean))
215-
# Normalize %xx escapes (e.g. %2f -> %2F)
216-
cleaned_parts.append(reserved.upper())
217-
218-
return "".join(cleaned_parts)
219-
220-
221-
def _clean_link(url: str) -> str:
222-
"""
223-
Make sure a link is fully quoted.
224-
For example, if ' ' occurs in the URL, it will be replaced with "%20",
225-
and without double-quoting other characters.
226-
"""
227-
# Split the URL into parts according to the general structure
228-
# `scheme://netloc/path;parameters?query#fragment`.
229-
result = urllib.parse.urlparse(url)
230-
# If the netloc is empty, then the URL refers to a local filesystem path.
231-
is_local_path = not result.netloc
232-
path = _clean_url_path(result.path, is_local_path=is_local_path)
233-
return urllib.parse.urlunparse(result._replace(path=path))
234-
235-
236-
def _create_link_from_element(
237-
anchor: HTMLElement,
238-
page_url: str,
239-
base_url: str,
240-
) -> Optional[Link]:
241-
"""
242-
Convert an anchor element in a simple repository page to a Link.
243-
"""
244-
href = anchor.get("href")
245-
if not href:
246-
return None
247-
248-
url = _clean_link(urllib.parse.urljoin(base_url, href))
249-
pyrequire = anchor.get("data-requires-python")
250-
yanked_reason = anchor.get("data-yanked")
251-
252-
link = Link(
253-
url,
254-
comes_from=page_url,
255-
requires_python=pyrequire,
256-
yanked_reason=yanked_reason,
257-
)
258-
259-
return link
260-
261-
262171
class CacheablePageContent:
263172
def __init__(self, page: "HTMLPage") -> None:
264173
assert page.cache_link_parsing
@@ -307,11 +216,7 @@ def parse_links(page: "HTMLPage") -> Iterable[Link]:
307216
url = page.url
308217
base_url = _determine_base_url(document, url)
309218
for anchor in document.findall(".//a"):
310-
link = _create_link_from_element(
311-
anchor,
312-
page_url=url,
313-
base_url=base_url,
314-
)
219+
link = Link.from_element(anchor, page_url=url, base_url=base_url)
315220
if link is None:
316221
continue
317222
yield link

src/pip/_internal/metadata/base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@ def __repr__(self) -> str:
101101
def __str__(self) -> str:
102102
return f"{self.raw_name} {self.version}"
103103

104+
def as_serializable_requirement(self) -> Requirement:
105+
raise NotImplementedError()
106+
104107
@property
105108
def location(self) -> Optional[str]:
106109
"""Where the distribution is loaded from.

src/pip/_internal/metadata/pkg_resources.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ def from_wheel(cls, wheel: Wheel, name: str) -> "Distribution":
120120
)
121121
return cls(dist)
122122

123+
def as_serializable_requirement(self) -> Requirement:
124+
return self._dist.as_requirement()
125+
123126
@property
124127
def location(self) -> Optional[str]:
125128
return self._dist.location

0 commit comments

Comments
 (0)