Add progress to streaming download (#1268)

cdeler · florimondmanca · tomchristie · web-flow · commit ed16eb3a3def · 2020-09-10T12:16:00.000+03:00
* Added last_raw_chunk_size to the Response object (#1208) * Added example with progress bar (#1208) Co-authored-by: Florimond Manca <florimond.manca@gmail.com> * Apply suggestions from code review Co-authored-by: Florimond Manca <florimond.manca@gmail.com> * PR review Changed last_raw_chunk_size to num_bytes_downloaded ; Edited the example according to documentaion * Update docs/advanced.md Co-authored-by: Florimond Manca <florimond.manca@gmail.com> * Update docs/advanced.md Co-authored-by: Florimond Manca <florimond.manca@gmail.com> * Update docs/advanced.md * Update docs/advanced.md Co-authored-by: Florimond Manca <florimond.manca@gmail.com> Co-authored-by: Tom Christie <tom@tomchristie.com>
diff --git a/docs/advanced.md b/docs/advanced.md
@@ -221,6 +221,34 @@ with httpx.Client(headers=headers) as client:
     ...
 ```
 
+## Monitoring download progress
+
+If you need to monitor download progress of large responses, you can use response streaming and inspect the `response.num_bytes_downloaded` property.
+
+This interface is required for properly determining download progress, because the total number of bytes returned by `response.content` or `response.iter_content()` will not always correspond with the raw content length of the response if HTTP response compression is being used.
+
+For example, showing a progress bar using the [`tqdm`](https://github.com/tqdm/tqdm) library while a response is being downloaded could be done like this…
+
+```python
+import tempfile
+
+import httpx
+from tqdm import tqdm
+
+with tempfile.NamedTemporaryFile() as download_file:
+    url = "https://speed.hetzner.de/100MB.bin"
+    with httpx.stream("GET", url) as response:
+        total = int(response.headers["Content-Length"])
+
+        with tqdm(total=total, unit_scale=True, unit_divisor=1024, unit="B") as progress:
+            num_bytes_downloaded = response.num_bytes_downloaded
+            for chunk in response.iter_bytes():
+                download_file.write(chunk)
+                progress.update(response.num_bytes_downloaded - num_bytes_downloaded)
+                num_bytes_downloaded = response.num_bytes_downloaded
+        print(f"The total download size is {response.num_bytes_downloaded} bytes")
+```
+
 ## .netrc Support
 
 HTTPX supports .netrc file. In `trust_env=True` cases, if auth parameter is
diff --git a/httpx/_models.py b/httpx/_models.py
@@ -697,6 +697,8 @@ def __init__(
             self._raw_stream = ByteStream(body=content or b"")
             self.read()
 
+        self._num_bytes_downloaded = 0
+
     @property
     def elapsed(self) -> datetime.timedelta:
         """
@@ -885,6 +887,10 @@ def links(self) -> typing.Dict[typing.Optional[str], typing.Dict[str, str]]:
                 ldict[key] = link
         return ldict
 
+    @property
+    def num_bytes_downloaded(self) -> int:
+        return self._num_bytes_downloaded
+
     def __repr__(self) -> str:
         return f"<Response [{self.status_code} {self.reason_phrase}]>"
 
@@ -951,8 +957,10 @@ def iter_raw(self) -> typing.Iterator[bytes]:
             raise ResponseClosed()
 
         self.is_stream_consumed = True
+        self._num_bytes_downloaded = 0
         with map_exceptions(HTTPCORE_EXC_MAP, request=self._request):
             for part in self._raw_stream:
+                self._num_bytes_downloaded += len(part)
                 yield part
         self.close()
 
@@ -1032,8 +1040,10 @@ async def aiter_raw(self) -> typing.AsyncIterator[bytes]:
             raise ResponseClosed()
 
         self.is_stream_consumed = True
+        self._num_bytes_downloaded = 0
         with map_exceptions(HTTPCORE_EXC_MAP, request=self._request):
             async for part in self._raw_stream:
+                self._num_bytes_downloaded += len(part)
                 yield part
         await self.aclose()
 
diff --git a/tests/models/test_responses.py b/tests/models/test_responses.py
@@ -227,6 +227,20 @@ def test_iter_raw():
     assert raw == b"Hello, world!"
 
 
+def test_iter_raw_increments_updates_counter():
+    stream = IteratorStream(iterator=streaming_body())
+
+    response = httpx.Response(
+        200,
+        stream=stream,
+    )
+
+    num_downloaded = response.num_bytes_downloaded
+    for part in response.iter_raw():
+        assert len(part) == (response.num_bytes_downloaded - num_downloaded)
+        num_downloaded = response.num_bytes_downloaded
+
+
 @pytest.mark.asyncio
 async def test_aiter_raw():
     stream = AsyncIteratorStream(aiterator=async_streaming_body())
@@ -241,6 +255,21 @@ async def test_aiter_raw():
     assert raw == b"Hello, world!"
 
 
+@pytest.mark.asyncio
+async def test_aiter_raw_increments_updates_counter():
+    stream = AsyncIteratorStream(aiterator=async_streaming_body())
+
+    response = httpx.Response(
+        200,
+        stream=stream,
+    )
+
+    num_downloaded = response.num_bytes_downloaded
+    async for part in response.aiter_raw():
+        assert len(part) == (response.num_bytes_downloaded - num_downloaded)
+        num_downloaded = response.num_bytes_downloaded
+
+
 def test_iter_bytes():
     response = httpx.Response(
         200,