diff --git a/easybuild/framework/easyblock.py b/easybuild/framework/easyblock.py index 6b33bc43c2..ad89b383fb 100644 --- a/easybuild/framework/easyblock.py +++ b/easybuild/framework/easyblock.py @@ -3038,6 +3038,37 @@ def run_all_steps(self, run_test_cases): print_msg("building and installing %s..." % self.full_mod_name, log=self.log, silent=self.silent) trace_msg("installation prefix: %s" % self.installdir) + + ignore_locks = build_option('ignore_locks') + + if ignore_locks: + self.log.info("Ignoring locks...") + else: + locks_dir = build_option('locks_dir') or os.path.join(install_path('software'), '.locks') + lock_path = os.path.join(locks_dir, '%s.lock' % self.installdir.replace('/', '_')) + + # if lock already exists, either abort or wait until it disappears + if os.path.exists(lock_path): + wait_on_lock = build_option('wait_on_lock') + if wait_on_lock: + while os.path.exists(lock_path): + print_msg("lock %s exists, waiting %d seconds..." % (lock_path, wait_on_lock), + silent=self.silent) + time.sleep(wait_on_lock) + else: + raise EasyBuildError("Lock %s already exists, aborting!", lock_path) + + # create lock to avoid that another installation running in parallel messes things up; + # we use a directory as a lock, since that's atomically created + try: + mkdir(lock_path, parents=True) + except EasyBuildError as err: + # clean up the error message a bit, get rid of the "Failed to create directory" part + quotes + stripped_err = str(err).split(':', 1)[1].strip().replace("'", '').replace('"', '') + raise EasyBuildError("Failed to create lock %s: %s", lock_path, stripped_err) + + self.log.info("Lock created: %s", lock_path) + try: for (step_name, descr, step_methods, skippable) in steps: if self._skip_step(step_name, skippable): @@ -3052,6 +3083,10 @@ def run_all_steps(self, run_test_cases): except StopException: pass + finally: + if not ignore_locks: + remove_dir(lock_path) + self.log.info("Lock removed: %s", lock_path) # return True for successfull build (or stopped build) return True diff --git a/easybuild/tools/config.py b/easybuild/tools/config.py index 1a410d4e06..30253b6f61 100644 --- a/easybuild/tools/config.py +++ b/easybuild/tools/config.py @@ -185,6 +185,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX): 'job_output_dir', 'job_polling_interval', 'job_target_resource', + 'locks_dir', 'modules_footer', 'modules_header', 'mpi_cmd_template', @@ -225,6 +226,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX): 'group_writable_installdir', 'hidden', 'ignore_checksums', + 'ignore_locks', 'install_latest_eb_release', 'lib64_fallback_sanity_check', 'logtostdout', @@ -249,6 +251,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX): 'use_f90cache', 'use_existing_modules', 'set_default_module', + 'wait_on_lock', ], True: [ 'cleanup_builddir', diff --git a/easybuild/tools/options.py b/easybuild/tools/options.py index 513bf715e6..17ddd9cd59 100644 --- a/easybuild/tools/options.py +++ b/easybuild/tools/options.py @@ -255,8 +255,13 @@ def basic_options(self): 'extended-dry-run-ignore-errors': ("Ignore errors that occur during dry run", None, 'store_true', True), 'force': ("Force to rebuild software even if it's already installed (i.e. if it can be found as module), " "and skipping check for OS dependencies", None, 'store_true', False, 'f'), + 'ignore-locks': ("Ignore locks that prevent two identical installations running in parallel", + None, 'store_true', False), 'job': ("Submit the build as a job", None, 'store_true', False), 'logtostdout': ("Redirect main log to stdout", None, 'store_true', False, 'l'), + 'locks-dir': ("Directory to store lock files (should be on a shared filesystem); " + "None implies .locks subdirectory of software installation directory", + None, 'store_or_None', None), 'missing-modules': ("Print list of missing modules for dependencies of specified easyconfigs", None, 'store_true', False, 'M'), 'only-blocks': ("Only build listed blocks", 'strlist', 'extend', None, 'b', {'metavar': 'BLOCKS'}), @@ -434,6 +439,8 @@ def override_options(self): None, 'store_true', False), 'verify-easyconfig-filenames': ("Verify whether filename of specified easyconfigs matches with contents", None, 'store_true', False), + 'wait-on-lock': ("Wait interval (in seconds) to use when waiting for existing lock to be removed " + "(0: implies no waiting, but exiting with an error)", int, 'store', 0), 'zip-logs': ("Zip logs that are copied to install directory, using specified command", None, 'store_or_None', 'gzip'), diff --git a/test/framework/toy_build.py b/test/framework/toy_build.py index d4a285e7fe..3145981ca7 100644 --- a/test/framework/toy_build.py +++ b/test/framework/toy_build.py @@ -34,6 +34,7 @@ import os import re import shutil +import signal import stat import sys import tempfile @@ -1415,7 +1416,7 @@ def test_module_only(self): self.assertTrue(os.path.exists(os.path.join(self.test_installpath, 'software', 'toy', '0.0-deps', 'bin'))) modtxt = read_file(toy_mod) self.assertTrue(re.search("set root %s" % prefix, modtxt)) - self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 1) + self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 2) self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software', 'toy'))), 1) # install (only) additional module under a hierarchical MNS @@ -1430,7 +1431,7 @@ def test_module_only(self): # existing install is reused modtxt2 = read_file(toy_core_mod) self.assertTrue(re.search("set root %s" % prefix, modtxt2)) - self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 2) + self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 3) self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software', 'toy'))), 1) # make sure load statements for dependencies are included @@ -1441,7 +1442,7 @@ def test_module_only(self): os.remove(toy_core_mod) # test installing (only) additional module in Lua syntax (if Lmod is available) - lmod_abspath = which('lmod') + lmod_abspath = os.environ.get('LMOD_CMD') or which('lmod') if lmod_abspath is not None: args = common_args[:-1] + [ '--allow-modules-tool-mismatch', @@ -1455,7 +1456,7 @@ def test_module_only(self): # existing install is reused modtxt3 = read_file(toy_mod + '.lua') self.assertTrue(re.search('local root = "%s"' % prefix, modtxt3)) - self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 2) + self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software'))), 3) self.assertEqual(len(os.listdir(os.path.join(self.test_installpath, 'software', 'toy'))), 1) # make sure load statements for dependencies are included @@ -2057,7 +2058,7 @@ def test_toy_modaltsoftname(self): self.assertTrue(os.path.exists(os.path.join(modules_path, 'yot', yot_name))) # only subdirectories for software should be created - self.assertEqual(os.listdir(software_path), ['toy']) + self.assertEqual(sorted(os.listdir(software_path)), sorted(['toy', '.locks'])) self.assertEqual(sorted(os.listdir(os.path.join(software_path, 'toy'))), ['0.0-one', '0.0-two']) # only subdirectories for modules with alternative names should be created @@ -2516,6 +2517,95 @@ def test_toy_ghost_installdir(self): self.assertFalse(os.path.exists(toy_installdir)) + def test_toy_build_lock(self): + """Test toy installation when a lock is already in place.""" + + locks_dir = os.path.join(self.test_installpath, 'software', '.locks') + toy_installdir = os.path.join(self.test_installpath, 'software', 'toy', '0.0') + toy_lock_fn = toy_installdir.replace(os.path.sep, '_') + '.lock' + + toy_lock_path = os.path.join(locks_dir, toy_lock_fn) + mkdir(toy_lock_path, parents=True) + + error_pattern = "Lock .*_software_toy_0.0.lock already exists, aborting!" + self.assertErrorRegex(EasyBuildError, error_pattern, self.test_toy_build, raise_error=True, verbose=False) + + locks_dir = os.path.join(self.test_prefix, 'locks') + + # no lock in place, so installation proceeds as normal + extra_args = ['--locks-dir=%s' % locks_dir] + self.test_toy_build(extra_args=extra_args, verify=True, raise_error=True) + + # put lock in place in custom locks dir, try again + toy_lock_path = os.path.join(locks_dir, toy_lock_fn) + mkdir(toy_lock_path, parents=True) + self.assertErrorRegex(EasyBuildError, error_pattern, self.test_toy_build, + extra_args=extra_args, raise_error=True, verbose=False) + + # also test use of --ignore-locks + self.test_toy_build(extra_args=extra_args + ['--ignore-locks'], verify=True, raise_error=True) + + # define a context manager that remove a lock after a while, so we can check the use of --wait-for-lock + class remove_lock_after: + def __init__(self, seconds, lock_fp): + self.seconds = seconds + self.lock_fp = lock_fp + + def remove_lock(self, *args): + remove_dir(self.lock_fp) + + def __enter__(self): + signal.signal(signal.SIGALRM, self.remove_lock) + signal.alarm(self.seconds) + + def __exit__(self, type, value, traceback): + pass + + # wait for lock to be removed, with 1 second interval of checking + extra_args.append('--wait-on-lock=1') + + wait_regex = re.compile("^== lock .*_software_toy_0.0.lock exists, waiting 1 seconds", re.M) + ok_regex = re.compile("^== COMPLETED: Installation ended successfully", re.M) + + self.assertTrue(os.path.exists(toy_lock_path)) + + # use context manager to remove lock after 3 seconds + with remove_lock_after(3, toy_lock_path): + self.mock_stderr(True) + self.mock_stdout(True) + self.test_toy_build(extra_args=extra_args, verify=False, raise_error=True, testing=False) + stderr, stdout = self.get_stderr(), self.get_stdout() + self.mock_stderr(False) + self.mock_stdout(False) + + self.assertEqual(stderr, '') + + wait_matches = wait_regex.findall(stdout) + # we can't rely on an exact number of 'waiting' messages, so let's go with a range... + self.assertTrue(len(wait_matches) in range(2, 5)) + + self.assertTrue(ok_regex.search(stdout), "Pattern '%s' found in: %s" % (ok_regex.pattern, stdout)) + + # when there is no lock in place, --wait-on-lock has no impact + self.assertFalse(os.path.exists(toy_lock_path)) + self.mock_stderr(True) + self.mock_stdout(True) + self.test_toy_build(extra_args=extra_args, verify=False, raise_error=True, testing=False) + stderr, stdout = self.get_stderr(), self.get_stdout() + self.mock_stderr(False) + self.mock_stdout(False) + + self.assertEqual(stderr, '') + self.assertTrue(ok_regex.search(stdout), "Pattern '%s' found in: %s" % (ok_regex.pattern, stdout)) + self.assertFalse(wait_regex.search(stdout), "Pattern '%s' not found in: %s" % (wait_regex.pattern, stdout)) + + # check for clean error on creation of lock + extra_args = ['--locks-dir=/'] + error_pattern = r"Failed to create lock /.*_software_toy_0.0.lock:.* " + error_pattern += r"(Read-only file system|Permission denied)" + self.assertErrorRegex(EasyBuildError, error_pattern, self.test_toy_build, + extra_args=extra_args, raise_error=True, verbose=False) + def suite(): """ return all the tests in this file """