Skip to content

Commit 7ed66b2

Browse files
authored
Merge pull request #7431 from chrahunt/refactor/operations-prepare-2
Move operations.prepare.Downloader (and friends) to network.download.Downloader
2 parents 9ccbfce + 78a221c commit 7ed66b2

File tree

7 files changed

+356
-327
lines changed

7 files changed

+356
-327
lines changed

src/pip/_internal/models/link.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def hash_name(self):
180180

181181
@property
182182
def show_url(self):
183-
# type: () -> Optional[str]
183+
# type: () -> str
184184
return posixpath.basename(self._url.split('#', 1)[0].split('?', 1)[0])
185185

186186
@property

src/pip/_internal/network/download.py

Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
"""Download files with progress indicators.
2+
"""
3+
import cgi
4+
import logging
5+
import mimetypes
6+
import os
7+
8+
from pip._vendor import requests
9+
from pip._vendor.requests.models import CONTENT_CHUNK_SIZE
10+
11+
from pip._internal.models.index import PyPI
12+
from pip._internal.network.cache import is_from_cache
13+
from pip._internal.network.utils import response_chunks
14+
from pip._internal.utils.misc import (
15+
format_size,
16+
redact_auth_from_url,
17+
splitext,
18+
)
19+
from pip._internal.utils.typing import MYPY_CHECK_RUNNING
20+
from pip._internal.utils.ui import DownloadProgressProvider
21+
22+
if MYPY_CHECK_RUNNING:
23+
from typing import Iterable, Optional
24+
25+
from pip._vendor.requests.models import Response
26+
27+
from pip._internal.models.link import Link
28+
from pip._internal.network.session import PipSession
29+
30+
logger = logging.getLogger(__name__)
31+
32+
33+
def _get_http_response_size(resp):
34+
# type: (Response) -> Optional[int]
35+
try:
36+
return int(resp.headers['content-length'])
37+
except (ValueError, KeyError, TypeError):
38+
return None
39+
40+
41+
def _prepare_download(
42+
resp, # type: Response
43+
link, # type: Link
44+
progress_bar # type: str
45+
):
46+
# type: (...) -> Iterable[bytes]
47+
total_length = _get_http_response_size(resp)
48+
49+
if link.netloc == PyPI.file_storage_domain:
50+
url = link.show_url
51+
else:
52+
url = link.url_without_fragment
53+
54+
logged_url = redact_auth_from_url(url)
55+
56+
if total_length:
57+
logged_url = '{} ({})'.format(logged_url, format_size(total_length))
58+
59+
if is_from_cache(resp):
60+
logger.info("Using cached %s", logged_url)
61+
else:
62+
logger.info("Downloading %s", logged_url)
63+
64+
if logger.getEffectiveLevel() > logging.INFO:
65+
show_progress = False
66+
elif is_from_cache(resp):
67+
show_progress = False
68+
elif not total_length:
69+
show_progress = True
70+
elif total_length > (40 * 1000):
71+
show_progress = True
72+
else:
73+
show_progress = False
74+
75+
chunks = response_chunks(resp, CONTENT_CHUNK_SIZE)
76+
77+
if not show_progress:
78+
return chunks
79+
80+
return DownloadProgressProvider(
81+
progress_bar, max=total_length
82+
)(chunks)
83+
84+
85+
def sanitize_content_filename(filename):
86+
# type: (str) -> str
87+
"""
88+
Sanitize the "filename" value from a Content-Disposition header.
89+
"""
90+
return os.path.basename(filename)
91+
92+
93+
def parse_content_disposition(content_disposition, default_filename):
94+
# type: (str, str) -> str
95+
"""
96+
Parse the "filename" value from a Content-Disposition header, and
97+
return the default filename if the result is empty.
98+
"""
99+
_type, params = cgi.parse_header(content_disposition)
100+
filename = params.get('filename')
101+
if filename:
102+
# We need to sanitize the filename to prevent directory traversal
103+
# in case the filename contains ".." path parts.
104+
filename = sanitize_content_filename(filename)
105+
return filename or default_filename
106+
107+
108+
def _get_http_response_filename(resp, link):
109+
# type: (Response, Link) -> str
110+
"""Get an ideal filename from the given HTTP response, falling back to
111+
the link filename if not provided.
112+
"""
113+
filename = link.filename # fallback
114+
# Have a look at the Content-Disposition header for a better guess
115+
content_disposition = resp.headers.get('content-disposition')
116+
if content_disposition:
117+
filename = parse_content_disposition(content_disposition, filename)
118+
ext = splitext(filename)[1] # type: Optional[str]
119+
if not ext:
120+
ext = mimetypes.guess_extension(
121+
resp.headers.get('content-type', '')
122+
)
123+
if ext:
124+
filename += ext
125+
if not ext and link.url != resp.url:
126+
ext = os.path.splitext(resp.url)[1]
127+
if ext:
128+
filename += ext
129+
return filename
130+
131+
132+
def _http_get_download(session, link):
133+
# type: (PipSession, Link) -> Response
134+
target_url = link.url.split('#', 1)[0]
135+
resp = session.get(
136+
target_url,
137+
# We use Accept-Encoding: identity here because requests
138+
# defaults to accepting compressed responses. This breaks in
139+
# a variety of ways depending on how the server is configured.
140+
# - Some servers will notice that the file isn't a compressible
141+
# file and will leave the file alone and with an empty
142+
# Content-Encoding
143+
# - Some servers will notice that the file is already
144+
# compressed and will leave the file alone and will add a
145+
# Content-Encoding: gzip header
146+
# - Some servers won't notice anything at all and will take
147+
# a file that's already been compressed and compress it again
148+
# and set the Content-Encoding: gzip header
149+
# By setting this to request only the identity encoding We're
150+
# hoping to eliminate the third case. Hopefully there does not
151+
# exist a server which when given a file will notice it is
152+
# already compressed and that you're not asking for a
153+
# compressed file and will then decompress it before sending
154+
# because if that's the case I don't think it'll ever be
155+
# possible to make this work.
156+
headers={"Accept-Encoding": "identity"},
157+
stream=True,
158+
)
159+
resp.raise_for_status()
160+
return resp
161+
162+
163+
class Download(object):
164+
def __init__(
165+
self,
166+
response, # type: Response
167+
filename, # type: str
168+
chunks, # type: Iterable[bytes]
169+
):
170+
# type: (...) -> None
171+
self.response = response
172+
self.filename = filename
173+
self.chunks = chunks
174+
175+
176+
class Downloader(object):
177+
def __init__(
178+
self,
179+
session, # type: PipSession
180+
progress_bar, # type: str
181+
):
182+
# type: (...) -> None
183+
self._session = session
184+
self._progress_bar = progress_bar
185+
186+
def __call__(self, link):
187+
# type: (Link) -> Download
188+
try:
189+
resp = _http_get_download(self._session, link)
190+
except requests.HTTPError as e:
191+
logger.critical(
192+
"HTTP error %s while getting %s", e.response.status_code, link
193+
)
194+
raise
195+
196+
return Download(
197+
resp,
198+
_get_http_response_filename(resp, link),
199+
_prepare_download(resp, link, self._progress_bar),
200+
)

0 commit comments

Comments
 (0)