Skip to content

Commit 250d36a

Browse files
authored
Merge pull request #4870 from boegel/from_pr_retries
implement exponential backoff in `download_file` + allow max. 6 attempts to download PR diff in `fetch_files_from_pr` and `fetch_files_from_commit`
2 parents 4fc1270 + 660ee44 commit 250d36a

File tree

4 files changed

+63
-11
lines changed

4 files changed

+63
-11
lines changed

easybuild/tools/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@
9696
DEFAULT_CONT_TYPE = CONT_TYPE_SINGULARITY
9797

9898
DEFAULT_BRANCH = 'develop'
99+
DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME = 10
100+
DEFAULT_DOWNLOAD_MAX_ATTEMPTS = 6
99101
DEFAULT_DOWNLOAD_TIMEOUT = 10
100102
DEFAULT_ENV_FOR_SHEBANG = '/usr/bin/env'
101103
DEFAULT_ENVVAR_USERS_MODULES = 'HOME'

easybuild/tools/filetools.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
# import build_log must stay, to use of EasyBuildLog
7070
from easybuild.tools.build_log import EasyBuildError, EasyBuildExit, CWD_NOTFOUND_ERROR
7171
from easybuild.tools.build_log import dry_run_msg, print_msg, print_warning
72+
from easybuild.tools.config import DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME, DEFAULT_DOWNLOAD_MAX_ATTEMPTS
7273
from easybuild.tools.config import ERROR, GENERIC_EASYBLOCK_PKG, IGNORE, WARN, build_option, install_path
7374
from easybuild.tools.output import PROGRESS_BAR_DOWNLOAD_ONE, start_progress_bar, stop_progress_bar, update_progress_bar
7475
from easybuild.tools.hooks import load_source
@@ -777,8 +778,23 @@ def det_file_size(http_header):
777778
return res
778779

779780

780-
def download_file(filename, url, path, forced=False, trace=True):
781-
"""Download a file from the given URL, to the specified path."""
781+
def download_file(filename, url, path, forced=False, trace=True, max_attempts=None, initial_wait_time=None):
782+
"""
783+
Download a file from the given URL, to the specified path.
784+
785+
:param filename: name of file to download
786+
:param url: URL of file to download
787+
:param path: path to download file to
788+
:param forced: boolean to indicate whether force should be used to write the file
789+
:param trace: boolean to indicate whether trace output should be printed
790+
:param max_attempts: max. number of attempts to download file from specified URL
791+
:param initial_wait_time: wait time (in seconds) after first attempt (doubled at each attempt)
792+
"""
793+
794+
if max_attempts is None:
795+
max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS
796+
if initial_wait_time is None:
797+
initial_wait_time = DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME
782798

783799
insecure = build_option('insecure_download')
784800

@@ -802,7 +818,6 @@ def download_file(filename, url, path, forced=False, trace=True):
802818

803819
# try downloading, three times max.
804820
downloaded = False
805-
max_attempts = 3
806821
attempt_cnt = 0
807822

808823
# use custom HTTP header
@@ -823,6 +838,8 @@ def download_file(filename, url, path, forced=False, trace=True):
823838
used_urllib = std_urllib
824839
switch_to_requests = False
825840

841+
wait_time = initial_wait_time
842+
826843
while not downloaded and attempt_cnt < max_attempts:
827844
attempt_cnt += 1
828845
try:
@@ -861,6 +878,8 @@ def download_file(filename, url, path, forced=False, trace=True):
861878
status_code = err.code
862879
if status_code == 403 and attempt_cnt == 1:
863880
switch_to_requests = True
881+
elif status_code == 429: # too many requests
882+
_log.warning(f"Downloading of {url} failed with HTTP status code 429 (Too many requests)")
864883
elif 400 <= status_code <= 499:
865884
_log.warning("URL %s was not found (HTTP response code %s), not trying again" % (url, status_code))
866885
break
@@ -887,6 +906,11 @@ def download_file(filename, url, path, forced=False, trace=True):
887906
_log.info("Downloading using requests package instead of urllib2")
888907
used_urllib = requests
889908

909+
# exponential backoff
910+
wait_time *= 2
911+
_log.info(f"Waiting for {wait_time} seconds before trying download of {url} again...")
912+
time.sleep(wait_time)
913+
890914
if downloaded:
891915
_log.info("Successful download of file %s from url %s to path %s" % (filename, url, path))
892916
if trace:

easybuild/tools/github.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
from easybuild.framework.easyconfig.parser import EasyConfigParser
5555
from easybuild.tools import LooseVersion
5656
from easybuild.tools.build_log import EasyBuildError, EasyBuildExit, print_msg, print_warning
57-
from easybuild.tools.config import build_option
57+
from easybuild.tools.config import DEFAULT_DOWNLOAD_MAX_ATTEMPTS, build_option
5858
from easybuild.tools.filetools import apply_patch, copy_dir, copy_easyblocks, copy_file, copy_framework_files
5959
from easybuild.tools.filetools import det_patched_files, download_file, extract_file
6060
from easybuild.tools.filetools import get_easyblock_class_name, mkdir, read_file, symlink, which, write_file
@@ -545,9 +545,16 @@ def fetch_files_from_pr(pr, path=None, github_user=None, github_account=None, gi
545545
github_user=github_user)
546546

547547
# determine list of changed files via diff
548-
diff_fn = os.path.basename(pr_data['diff_url'])
548+
diff_url = pr_data['diff_url']
549+
diff_fn = os.path.basename(diff_url)
549550
diff_filepath = os.path.join(path, diff_fn)
550-
download_file(diff_fn, pr_data['diff_url'], diff_filepath, forced=True, trace=False)
551+
# max. 6 attempts + initial wait time of 10sec -> max. 10 * (2^6) = 640sec (~10min) before giving up on download
552+
# see also https://github.com/easybuilders/easybuild-framework/issues/4869
553+
max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS
554+
download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False,
555+
max_attempts=max_attempts)
556+
if not os.path.exists(diff_filepath):
557+
raise EasyBuildError(f"Failed to download {diff_url}, even after {max_attempts} attempts and being patient...")
551558
diff_txt = read_file(diff_filepath)
552559
_log.debug("Diff for PR #%s:\n%s", pr, diff_txt)
553560

@@ -699,17 +706,21 @@ def fetch_files_from_commit(commit, files=None, path=None, github_account=None,
699706
diff_url = os.path.join(GITHUB_URL, github_account, github_repo, 'commit', commit + '.diff')
700707
diff_fn = os.path.basename(diff_url)
701708
diff_filepath = os.path.join(path, diff_fn)
702-
if download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False):
709+
# max. 6 attempts + initial wait time of 10sec -> max. 10 * (2^6) = 640sec (~10min) before giving up on download
710+
# see also https://github.com/easybuilders/easybuild-framework/issues/4869
711+
max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS
712+
download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False,
713+
max_attempts=max_attempts)
714+
if os.path.exists(diff_filepath):
703715
diff_txt = read_file(diff_filepath)
704716
_log.debug("Diff for commit %s:\n%s", commit, diff_txt)
705717

706718
files = det_patched_files(txt=diff_txt, omit_ab_prefix=True, github=True, filter_deleted=True)
707719
_log.debug("List of patched files for commit %s: %s", commit, files)
708720
else:
709-
raise EasyBuildError(
710-
"Failed to download diff for commit %s of %s/%s", commit, github_account, github_repo,
711-
exit_code=EasyBuildExit.FAIL_GITHUB
712-
)
721+
msg = f"Failed to download diff for commit {commit} of {github_account}/{github_repo} "
722+
msg += " (after {max_attempts} attempts)"
723+
raise EasyBuildError(msg, exit_code=EasyBuildExit.FAIL_GITHUB)
713724

714725
# download tarball for specific commit
715726
repo_commit = download_repo(repo=github_repo, commit=commit, account=github_account)

test/framework/filetools.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,6 +557,21 @@ def test_download_file(self):
557557
downloads = glob.glob(target_location + '*')
558558
self.assertEqual(len(downloads), 1)
559559

560+
ft.remove_file(target_location)
561+
562+
# with max attempts set to 0, nothing gets downloaded
563+
with self.mocked_stdout_stderr():
564+
res = ft.download_file(fn, source_url, target_location, max_attempts=0)
565+
self.assertEqual(res, None)
566+
downloads = glob.glob(target_location + '*')
567+
self.assertEqual(len(downloads), 0)
568+
569+
with self.mocked_stdout_stderr():
570+
res = ft.download_file(fn, source_url, target_location, max_attempts=3, initial_wait_time=5)
571+
self.assertEqual(res, target_location, "'download' of local file works")
572+
downloads = glob.glob(target_location + '*')
573+
self.assertEqual(len(downloads), 1)
574+
560575
# non-existing files result in None return value
561576
with self.mocked_stdout_stderr():
562577
self.assertEqual(ft.download_file(fn, 'file://%s/nosuchfile' % test_dir, target_location), None)

0 commit comments

Comments
 (0)