Skip to content

Commit ed16eb3

Browse files
cdelerflorimondmancatomchristie
authored
Add progress to streaming download (#1268)
* Added last_raw_chunk_size to the Response object (#1208) * Added example with progress bar (#1208) Co-authored-by: Florimond Manca <[email protected]> * Apply suggestions from code review Co-authored-by: Florimond Manca <[email protected]> * PR review Changed last_raw_chunk_size to num_bytes_downloaded ; Edited the example according to documentaion * Update docs/advanced.md Co-authored-by: Florimond Manca <[email protected]> * Update docs/advanced.md Co-authored-by: Florimond Manca <[email protected]> * Update docs/advanced.md * Update docs/advanced.md Co-authored-by: Florimond Manca <[email protected]> Co-authored-by: Tom Christie <[email protected]>
1 parent 4d950e5 commit ed16eb3

File tree

3 files changed

+67
-0
lines changed

3 files changed

+67
-0
lines changed

docs/advanced.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,34 @@ with httpx.Client(headers=headers) as client:
221221
...
222222
```
223223

224+
## Monitoring download progress
225+
226+
If you need to monitor download progress of large responses, you can use response streaming and inspect the `response.num_bytes_downloaded` property.
227+
228+
This interface is required for properly determining download progress, because the total number of bytes returned by `response.content` or `response.iter_content()` will not always correspond with the raw content length of the response if HTTP response compression is being used.
229+
230+
For example, showing a progress bar using the [`tqdm`](https://github.com/tqdm/tqdm) library while a response is being downloaded could be done like this…
231+
232+
```python
233+
import tempfile
234+
235+
import httpx
236+
from tqdm import tqdm
237+
238+
with tempfile.NamedTemporaryFile() as download_file:
239+
url = "https://speed.hetzner.de/100MB.bin"
240+
with httpx.stream("GET", url) as response:
241+
total = int(response.headers["Content-Length"])
242+
243+
with tqdm(total=total, unit_scale=True, unit_divisor=1024, unit="B") as progress:
244+
num_bytes_downloaded = response.num_bytes_downloaded
245+
for chunk in response.iter_bytes():
246+
download_file.write(chunk)
247+
progress.update(response.num_bytes_downloaded - num_bytes_downloaded)
248+
num_bytes_downloaded = response.num_bytes_downloaded
249+
print(f"The total download size is {response.num_bytes_downloaded} bytes")
250+
```
251+
224252
## .netrc Support
225253

226254
HTTPX supports .netrc file. In `trust_env=True` cases, if auth parameter is

httpx/_models.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,8 @@ def __init__(
697697
self._raw_stream = ByteStream(body=content or b"")
698698
self.read()
699699

700+
self._num_bytes_downloaded = 0
701+
700702
@property
701703
def elapsed(self) -> datetime.timedelta:
702704
"""
@@ -885,6 +887,10 @@ def links(self) -> typing.Dict[typing.Optional[str], typing.Dict[str, str]]:
885887
ldict[key] = link
886888
return ldict
887889

890+
@property
891+
def num_bytes_downloaded(self) -> int:
892+
return self._num_bytes_downloaded
893+
888894
def __repr__(self) -> str:
889895
return f"<Response [{self.status_code} {self.reason_phrase}]>"
890896

@@ -951,8 +957,10 @@ def iter_raw(self) -> typing.Iterator[bytes]:
951957
raise ResponseClosed()
952958

953959
self.is_stream_consumed = True
960+
self._num_bytes_downloaded = 0
954961
with map_exceptions(HTTPCORE_EXC_MAP, request=self._request):
955962
for part in self._raw_stream:
963+
self._num_bytes_downloaded += len(part)
956964
yield part
957965
self.close()
958966

@@ -1032,8 +1040,10 @@ async def aiter_raw(self) -> typing.AsyncIterator[bytes]:
10321040
raise ResponseClosed()
10331041

10341042
self.is_stream_consumed = True
1043+
self._num_bytes_downloaded = 0
10351044
with map_exceptions(HTTPCORE_EXC_MAP, request=self._request):
10361045
async for part in self._raw_stream:
1046+
self._num_bytes_downloaded += len(part)
10371047
yield part
10381048
await self.aclose()
10391049

tests/models/test_responses.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,20 @@ def test_iter_raw():
227227
assert raw == b"Hello, world!"
228228

229229

230+
def test_iter_raw_increments_updates_counter():
231+
stream = IteratorStream(iterator=streaming_body())
232+
233+
response = httpx.Response(
234+
200,
235+
stream=stream,
236+
)
237+
238+
num_downloaded = response.num_bytes_downloaded
239+
for part in response.iter_raw():
240+
assert len(part) == (response.num_bytes_downloaded - num_downloaded)
241+
num_downloaded = response.num_bytes_downloaded
242+
243+
230244
@pytest.mark.asyncio
231245
async def test_aiter_raw():
232246
stream = AsyncIteratorStream(aiterator=async_streaming_body())
@@ -241,6 +255,21 @@ async def test_aiter_raw():
241255
assert raw == b"Hello, world!"
242256

243257

258+
@pytest.mark.asyncio
259+
async def test_aiter_raw_increments_updates_counter():
260+
stream = AsyncIteratorStream(aiterator=async_streaming_body())
261+
262+
response = httpx.Response(
263+
200,
264+
stream=stream,
265+
)
266+
267+
num_downloaded = response.num_bytes_downloaded
268+
async for part in response.aiter_raw():
269+
assert len(part) == (response.num_bytes_downloaded - num_downloaded)
270+
num_downloaded = response.num_bytes_downloaded
271+
272+
244273
def test_iter_bytes():
245274
response = httpx.Response(
246275
200,

0 commit comments

Comments
 (0)