Skip to content

Commit 919ce7d

Browse files
authored
Fix progress bar not always closed in file_download.py (#2308)
* Fix progress bar not always closed in file_download.py * Fix feedback from charles
1 parent c8dc5f5 commit 919ce7d

File tree

1 file changed

+69
-64
lines changed

1 file changed

+69
-64
lines changed

src/huggingface_hub/file_download.py

Lines changed: 69 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import contextlib
12
import copy
23
import errno
34
import fnmatch
@@ -487,9 +488,8 @@ def http_get(
487488
)
488489

489490
# Stream file to buffer
490-
progress = _tqdm_bar
491-
if progress is None:
492-
progress = tqdm(
491+
progress_cm: tqdm = (
492+
tqdm( # type: ignore[assignment]
493493
unit="B",
494494
unit_scale=True,
495495
total=total,
@@ -500,71 +500,76 @@ def http_get(
500500
# see https://github.com/huggingface/huggingface_hub/pull/2000
501501
name="huggingface_hub.http_get",
502502
)
503+
if _tqdm_bar is None
504+
else contextlib.nullcontext(_tqdm_bar)
505+
# ^ `contextlib.nullcontext` mimics a context manager that does nothing
506+
# Makes it easier to use the same code path for both cases but in the later
507+
# case, the progress bar is not closed when exiting the context manager.
508+
)
503509

504-
if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
505-
supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
506-
if not supports_callback:
507-
warnings.warn(
508-
"You are using an outdated version of `hf_transfer`. "
509-
"Consider upgrading to latest version to enable progress bars "
510-
"using `pip install -U hf_transfer`."
511-
)
510+
with progress_cm as progress:
511+
if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
512+
supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
513+
if not supports_callback:
514+
warnings.warn(
515+
"You are using an outdated version of `hf_transfer`. "
516+
"Consider upgrading to latest version to enable progress bars "
517+
"using `pip install -U hf_transfer`."
518+
)
519+
try:
520+
hf_transfer.download(
521+
url=url,
522+
filename=temp_file.name,
523+
max_files=HF_TRANSFER_CONCURRENCY,
524+
chunk_size=DOWNLOAD_CHUNK_SIZE,
525+
headers=headers,
526+
parallel_failures=3,
527+
max_retries=5,
528+
**({"callback": progress.update} if supports_callback else {}),
529+
)
530+
except Exception as e:
531+
raise RuntimeError(
532+
"An error occurred while downloading using `hf_transfer`. Consider"
533+
" disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
534+
) from e
535+
if not supports_callback:
536+
progress.update(total)
537+
if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
538+
raise EnvironmentError(
539+
consistency_error_message.format(
540+
actual_size=os.path.getsize(temp_file.name),
541+
)
542+
)
543+
return
544+
new_resume_size = resume_size
512545
try:
513-
hf_transfer.download(
546+
for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
547+
if chunk: # filter out keep-alive new chunks
548+
progress.update(len(chunk))
549+
temp_file.write(chunk)
550+
new_resume_size += len(chunk)
551+
# Some data has been downloaded from the server so we reset the number of retries.
552+
_nb_retries = 5
553+
except (requests.ConnectionError, requests.ReadTimeout) as e:
554+
# If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
555+
# a transient error (network outage?). We log a warning message and try to resume the download a few times
556+
# before giving up. Tre retry mechanism is basic but should be enough in most cases.
557+
if _nb_retries <= 0:
558+
logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
559+
raise
560+
logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
561+
time.sleep(1)
562+
reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
563+
return http_get(
514564
url=url,
515-
filename=temp_file.name,
516-
max_files=HF_TRANSFER_CONCURRENCY,
517-
chunk_size=DOWNLOAD_CHUNK_SIZE,
518-
headers=headers,
519-
parallel_failures=3,
520-
max_retries=5,
521-
**({"callback": progress.update} if supports_callback else {}),
522-
)
523-
except Exception as e:
524-
raise RuntimeError(
525-
"An error occurred while downloading using `hf_transfer`. Consider"
526-
" disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
527-
) from e
528-
if not supports_callback:
529-
progress.update(total)
530-
if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
531-
raise EnvironmentError(
532-
consistency_error_message.format(
533-
actual_size=os.path.getsize(temp_file.name),
534-
)
565+
temp_file=temp_file,
566+
proxies=proxies,
567+
resume_size=new_resume_size,
568+
headers=initial_headers,
569+
expected_size=expected_size,
570+
_nb_retries=_nb_retries - 1,
571+
_tqdm_bar=_tqdm_bar,
535572
)
536-
return
537-
new_resume_size = resume_size
538-
try:
539-
for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
540-
if chunk: # filter out keep-alive new chunks
541-
progress.update(len(chunk))
542-
temp_file.write(chunk)
543-
new_resume_size += len(chunk)
544-
# Some data has been downloaded from the server so we reset the number of retries.
545-
_nb_retries = 5
546-
except (requests.ConnectionError, requests.ReadTimeout) as e:
547-
# If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
548-
# a transient error (network outage?). We log a warning message and try to resume the download a few times
549-
# before giving up. Tre retry mechanism is basic but should be enough in most cases.
550-
if _nb_retries <= 0:
551-
logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
552-
raise
553-
logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
554-
time.sleep(1)
555-
reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
556-
return http_get(
557-
url=url,
558-
temp_file=temp_file,
559-
proxies=proxies,
560-
resume_size=new_resume_size,
561-
headers=initial_headers,
562-
expected_size=expected_size,
563-
_nb_retries=_nb_retries - 1,
564-
_tqdm_bar=_tqdm_bar,
565-
)
566-
567-
progress.close()
568573

569574
if expected_size is not None and expected_size != temp_file.tell():
570575
raise EnvironmentError(

0 commit comments

Comments
 (0)