Skip to content

implement exponential backoff in download_file + allow max. 6 attempts to download PR diff in fetch_files_from_pr and fetch_files_from_commit #4870

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
May 14, 2025
2 changes: 2 additions & 0 deletions easybuild/tools/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@
DEFAULT_CONT_TYPE = CONT_TYPE_SINGULARITY

DEFAULT_BRANCH = 'develop'
DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME = 10
DEFAULT_DOWNLOAD_MAX_ATTEMPTS = 6
DEFAULT_DOWNLOAD_TIMEOUT = 10
DEFAULT_ENV_FOR_SHEBANG = '/usr/bin/env'
DEFAULT_ENVVAR_USERS_MODULES = 'HOME'
Expand Down
30 changes: 27 additions & 3 deletions easybuild/tools/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
# import build_log must stay, to use of EasyBuildLog
from easybuild.tools.build_log import EasyBuildError, EasyBuildExit, CWD_NOTFOUND_ERROR
from easybuild.tools.build_log import dry_run_msg, print_msg, print_warning
from easybuild.tools.config import DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME, DEFAULT_DOWNLOAD_MAX_ATTEMPTS
from easybuild.tools.config import ERROR, GENERIC_EASYBLOCK_PKG, IGNORE, WARN, build_option, install_path
from easybuild.tools.output import PROGRESS_BAR_DOWNLOAD_ONE, start_progress_bar, stop_progress_bar, update_progress_bar
from easybuild.tools.hooks import load_source
Expand Down Expand Up @@ -777,8 +778,23 @@ def det_file_size(http_header):
return res


def download_file(filename, url, path, forced=False, trace=True):
"""Download a file from the given URL, to the specified path."""
def download_file(filename, url, path, forced=False, trace=True, max_attempts=None, initial_wait_time=None):
"""
Download a file from the given URL, to the specified path.

:param filename: name of file to download
:param url: URL of file to download
:param path: path to download file to
:param forced: boolean to indicate whether force should be used to write the file
:param trace: boolean to indicate whether trace output should be printed
:param max_attempts: max. number of attempts to download file from specified URL
:param initial_wait_time: wait time (in seconds) after first attempt (doubled at each attempt)
"""

if max_attempts is None:
max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS
if initial_wait_time is None:
initial_wait_time = DEFAULT_DOWNLOAD_INITIAL_WAIT_TIME

insecure = build_option('insecure_download')

Expand All @@ -802,7 +818,6 @@ def download_file(filename, url, path, forced=False, trace=True):

# try downloading, three times max.
downloaded = False
max_attempts = 3
attempt_cnt = 0

# use custom HTTP header
Expand All @@ -823,6 +838,8 @@ def download_file(filename, url, path, forced=False, trace=True):
used_urllib = std_urllib
switch_to_requests = False

wait_time = initial_wait_time

while not downloaded and attempt_cnt < max_attempts:
attempt_cnt += 1
try:
Expand Down Expand Up @@ -861,6 +878,8 @@ def download_file(filename, url, path, forced=False, trace=True):
status_code = err.code
if status_code == 403 and attempt_cnt == 1:
switch_to_requests = True
elif status_code == 429: # too many requests
_log.warning(f"Downloading of {url} failed with HTTP status code 429 (Too many requests)")
elif 400 <= status_code <= 499:
_log.warning("URL %s was not found (HTTP response code %s), not trying again" % (url, status_code))
break
Expand All @@ -887,6 +906,11 @@ def download_file(filename, url, path, forced=False, trace=True):
_log.info("Downloading using requests package instead of urllib2")
used_urllib = requests

# exponential backoff
wait_time *= 2
_log.info(f"Waiting for {wait_time} seconds before trying download of {url} again...")
time.sleep(wait_time)

if downloaded:
_log.info("Successful download of file %s from url %s to path %s" % (filename, url, path))
if trace:
Expand Down
27 changes: 19 additions & 8 deletions easybuild/tools/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
from easybuild.framework.easyconfig.parser import EasyConfigParser
from easybuild.tools import LooseVersion
from easybuild.tools.build_log import EasyBuildError, EasyBuildExit, print_msg, print_warning
from easybuild.tools.config import build_option
from easybuild.tools.config import DEFAULT_DOWNLOAD_MAX_ATTEMPTS, build_option
from easybuild.tools.filetools import apply_patch, copy_dir, copy_easyblocks, copy_file, copy_framework_files
from easybuild.tools.filetools import det_patched_files, download_file, extract_file
from easybuild.tools.filetools import get_easyblock_class_name, mkdir, read_file, symlink, which, write_file
Expand Down Expand Up @@ -546,9 +546,16 @@ def fetch_files_from_pr(pr, path=None, github_user=None, github_account=None, gi
github_user=github_user)

# determine list of changed files via diff
diff_fn = os.path.basename(pr_data['diff_url'])
diff_url = pr_data['diff_url']
diff_fn = os.path.basename(diff_url)
diff_filepath = os.path.join(path, diff_fn)
download_file(diff_fn, pr_data['diff_url'], diff_filepath, forced=True, trace=False)
# max. 6 attempts + initial wait time of 10sec -> max. 10 * (2^6) = 640sec (~10min) before giving up on download
# see also https://github.com/easybuilders/easybuild-framework/issues/4869
max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS
download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False,
max_attempts=max_attempts)
if not os.path.exists(diff_filepath):
raise EasyBuildError(f"Failed to download {diff_url}, even after {max_attempts} attempts and being patient...")
diff_txt = read_file(diff_filepath)
_log.debug("Diff for PR #%s:\n%s", pr, diff_txt)

Expand Down Expand Up @@ -700,17 +707,21 @@ def fetch_files_from_commit(commit, files=None, path=None, github_account=None,
diff_url = os.path.join(GITHUB_URL, github_account, github_repo, 'commit', commit + '.diff')
diff_fn = os.path.basename(diff_url)
diff_filepath = os.path.join(path, diff_fn)
if download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False):
# max. 6 attempts + initial wait time of 10sec -> max. 10 * (2^6) = 640sec (~10min) before giving up on download
# see also https://github.com/easybuilders/easybuild-framework/issues/4869
max_attempts = DEFAULT_DOWNLOAD_MAX_ATTEMPTS
download_file(diff_fn, diff_url, diff_filepath, forced=True, trace=False,
max_attempts=max_attempts)
if os.path.exists(diff_filepath):
diff_txt = read_file(diff_filepath)
_log.debug("Diff for commit %s:\n%s", commit, diff_txt)

files = det_patched_files(txt=diff_txt, omit_ab_prefix=True, github=True, filter_deleted=True)
_log.debug("List of patched files for commit %s: %s", commit, files)
else:
raise EasyBuildError(
"Failed to download diff for commit %s of %s/%s", commit, github_account, github_repo,
exit_code=EasyBuildExit.FAIL_GITHUB
)
msg = f"Failed to download diff for commit {commit} of {github_account}/{github_repo} "
msg += " (after {max_attempts} attempts)"
raise EasyBuildError(msg, exit_code=EasyBuildExit.FAIL_GITHUB)

# download tarball for specific commit
repo_commit = download_repo(repo=github_repo, commit=commit, account=github_account)
Expand Down
15 changes: 15 additions & 0 deletions test/framework/filetools.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,21 @@ def test_download_file(self):
downloads = glob.glob(target_location + '*')
self.assertEqual(len(downloads), 1)

ft.remove_file(target_location)

# with max attempts set to 0, nothing gets downloaded
with self.mocked_stdout_stderr():
res = ft.download_file(fn, source_url, target_location, max_attempts=0)
self.assertEqual(res, None)
downloads = glob.glob(target_location + '*')
self.assertEqual(len(downloads), 0)

with self.mocked_stdout_stderr():
res = ft.download_file(fn, source_url, target_location, max_attempts=3, initial_wait_time=5)
self.assertEqual(res, target_location, "'download' of local file works")
downloads = glob.glob(target_location + '*')
self.assertEqual(len(downloads), 1)

# non-existing files result in None return value
with self.mocked_stdout_stderr():
self.assertEqual(ft.download_file(fn, 'file://%s/nosuchfile' % test_dir, target_location), None)
Expand Down