Skip to content

Commit e39976b

Browse files
authored
Merge pull request #20 from boegel/build_lock
bug fix, code cleanup + dedicated test for installation lock
2 parents f355c1a + 81f7833 commit e39976b

File tree

4 files changed

+139
-38
lines changed

4 files changed

+139
-38
lines changed

easybuild/framework/easyblock.py

Lines changed: 45 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3039,41 +3039,54 @@ def run_all_steps(self, run_test_cases):
30393039
print_msg("building and installing %s..." % self.full_mod_name, log=self.log, silent=self.silent)
30403040
trace_msg("installation prefix: %s" % self.installdir)
30413041

3042-
lockpath = build_option('lockpath') or os.path.join(install_path('software'), '.locks')
3043-
if not os.path.exists(lockpath):
3044-
mkdir(lockpath, parents=True)
3045-
lockfile_name = os.path.join(lockpath, ".%s.lock" % self.installdir.replace('/', '_'))
3046-
if os.path.exists(lockfile_name):
3047-
if build_option('wait_on_lock'):
3048-
while os.path.exists(lockfile_name):
3049-
print_msg("Lock file %s exists. Waiting 60 seconds." % lockfile_name, silent=self.silent)
3050-
time.sleep(60)
3051-
else:
3052-
print_msg("Build aborted. Lock file %s exists." % lockfile_name, silent=self.silent)
3053-
return False
3042+
ignore_locks = build_option('ignore_locks')
3043+
3044+
if ignore_locks:
3045+
self.log.info("Ignoring locks...")
30543046
else:
3047+
locks_dir = build_option('locks_dir') or os.path.join(install_path('software'), '.locks')
3048+
lock_path = os.path.join(locks_dir, '%s.lock' % self.installdir.replace('/', '_'))
3049+
3050+
# if lock already exists, either abort or wait until it disappears
3051+
if os.path.exists(lock_path):
3052+
wait_on_lock = build_option('wait_on_lock')
3053+
if wait_on_lock:
3054+
while os.path.exists(lock_path):
3055+
print_msg("lock %s exists, waiting %d seconds..." % (lock_path, wait_on_lock),
3056+
silent=self.silent)
3057+
time.sleep(wait_on_lock)
3058+
else:
3059+
raise EasyBuildError("Lock %s already exists, aborting!", lock_path)
3060+
3061+
# create lock to avoid that another installation running in parallel messes things up;
3062+
# we use a directory as a lock, since that's atomically created
30553063
try:
3056-
# create a new lock file
3057-
print_msg("Creating lock file %s" % lockfile_name, silent=self.silent)
3058-
f = open(lockfile_name, "w+")
3059-
f.close()
3060-
3061-
for (step_name, descr, step_methods, skippable) in steps:
3062-
if self._skip_step(step_name, skippable):
3063-
print_msg("%s [skipped]" % descr, log=self.log, silent=self.silent)
3064+
mkdir(lock_path, parents=True)
3065+
except EasyBuildError as err:
3066+
# clean up the error message a bit, get rid of the "Failed to create directory" part + quotes
3067+
stripped_err = str(err).split(':', 1)[1].strip().replace("'", '').replace('"', '')
3068+
raise EasyBuildError("Failed to create lock %s: %s", lock_path, stripped_err)
3069+
3070+
self.log.info("Lock created: %s", lock_path)
3071+
3072+
try:
3073+
for (step_name, descr, step_methods, skippable) in steps:
3074+
if self._skip_step(step_name, skippable):
3075+
print_msg("%s [skipped]" % descr, log=self.log, silent=self.silent)
3076+
else:
3077+
if self.dry_run:
3078+
self.dry_run_msg("%s... [DRY RUN]\n", descr)
30643079
else:
3065-
if self.dry_run:
3066-
self.dry_run_msg("%s... [DRY RUN]\n", descr)
3067-
else:
3068-
print_msg("%s..." % descr, log=self.log, silent=self.silent)
3069-
self.current_step = step_name
3070-
self.run_step(step_name, step_methods)
3071-
3072-
except StopException:
3073-
pass
3074-
finally:
3075-
print_msg("Removing lock file %s" % lockfile_name, silent=self.silent)
3076-
os.remove(lockfile_name)
3080+
print_msg("%s..." % descr, log=self.log, silent=self.silent)
3081+
self.current_step = step_name
3082+
self.run_step(step_name, step_methods)
3083+
3084+
except StopException:
3085+
pass
3086+
finally:
3087+
if not ignore_locks:
3088+
remove_dir(lock_path)
3089+
self.log.info("Lock removed: %s", lock_path)
30773090

30783091
# return True for successfull build (or stopped build)
30793092
return True

easybuild/tools/config.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
185185
'job_output_dir',
186186
'job_polling_interval',
187187
'job_target_resource',
188-
'lockpath',
188+
'locks_dir',
189189
'modules_footer',
190190
'modules_header',
191191
'mpi_cmd_template',
@@ -226,6 +226,7 @@ def mk_full_default_path(name, prefix=DEFAULT_PREFIX):
226226
'group_writable_installdir',
227227
'hidden',
228228
'ignore_checksums',
229+
'ignore_locks',
229230
'install_latest_eb_release',
230231
'lib64_fallback_sanity_check',
231232
'logtostdout',

easybuild/tools/options.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,12 @@ def basic_options(self):
255255
'extended-dry-run-ignore-errors': ("Ignore errors that occur during dry run", None, 'store_true', True),
256256
'force': ("Force to rebuild software even if it's already installed (i.e. if it can be found as module), "
257257
"and skipping check for OS dependencies", None, 'store_true', False, 'f'),
258+
'ignore-locks': ("Ignore locks that prevent two identical installations running in parallel",
259+
None, 'store_true', False),
258260
'job': ("Submit the build as a job", None, 'store_true', False),
259261
'logtostdout': ("Redirect main log to stdout", None, 'store_true', False, 'l'),
260-
'lockpath': ("Specifies which path should be used to store lock files", None, 'store_or_None', None),
262+
'locks-dir': ("Directory to store lock files (should be on a shared filesystem); "
263+
"None implies .locks subdirectory of software installation directory", None, 'store_or_None', None),
261264
'missing-modules': ("Print list of missing modules for dependencies of specified easyconfigs",
262265
None, 'store_true', False, 'M'),
263266
'only-blocks': ("Only build listed blocks", 'strlist', 'extend', None, 'b', {'metavar': 'BLOCKS'}),
@@ -435,8 +438,8 @@ def override_options(self):
435438
None, 'store_true', False),
436439
'verify-easyconfig-filenames': ("Verify whether filename of specified easyconfigs matches with contents",
437440
None, 'store_true', False),
438-
'wait-on-lock': ("Wait until lock file is removed when a lock if found",
439-
None, 'store_true', False),
441+
'wait-on-lock': ("Wait interval (in seconds) to use when waiting for existing lock to be removed "
442+
"(0: implies no waiting, but exiting with an error)", int, 'store', 0),
440443
'zip-logs': ("Zip logs that are copied to install directory, using specified command",
441444
None, 'store_or_None', 'gzip'),
442445

test/framework/toy_build.py

Lines changed: 86 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,12 @@
3434
import os
3535
import re
3636
import shutil
37+
import signal
3738
import stat
3839
import sys
3940
import tempfile
4041
from distutils.version import LooseVersion
42+
from functools import wraps
4143
from test.framework.utilities import EnhancedTestCase, TestLoaderFiltered
4244
from test.framework.package import mock_fpm
4345
from unittest import TextTestRunner
@@ -1441,7 +1443,7 @@ def test_module_only(self):
14411443
os.remove(toy_core_mod)
14421444

14431445
# test installing (only) additional module in Lua syntax (if Lmod is available)
1444-
lmod_abspath = which('lmod')
1446+
lmod_abspath = os.environ.get('LMOD_CMD') or which('lmod')
14451447
if lmod_abspath is not None:
14461448
args = common_args[:-1] + [
14471449
'--allow-modules-tool-mismatch',
@@ -2057,7 +2059,7 @@ def test_toy_modaltsoftname(self):
20572059
self.assertTrue(os.path.exists(os.path.join(modules_path, 'yot', yot_name)))
20582060

20592061
# only subdirectories for software should be created
2060-
self.assertEqual(sorted(os.listdir(software_path)), sorted(['.locks', 'toy']))
2062+
self.assertEqual(os.listdir(software_path), ['toy', '.locks'])
20612063
self.assertEqual(sorted(os.listdir(os.path.join(software_path, 'toy'))), ['0.0-one', '0.0-two'])
20622064

20632065
# only subdirectories for modules with alternative names should be created
@@ -2516,6 +2518,88 @@ def test_toy_ghost_installdir(self):
25162518

25172519
self.assertFalse(os.path.exists(toy_installdir))
25182520

2521+
def test_toy_build_lock(self):
2522+
"""Test toy installation when a lock is already in place."""
2523+
2524+
locks_dir = os.path.join(self.test_installpath, 'software', '.locks')
2525+
toy_installdir = os.path.join(self.test_installpath, 'software', 'toy', '0.0')
2526+
toy_lock_fn = toy_installdir.replace(os.path.sep, '_') + '.lock'
2527+
2528+
toy_lock_path = os.path.join(locks_dir, toy_lock_fn)
2529+
mkdir(toy_lock_path, parents=True)
2530+
2531+
error_pattern = "Lock .*_software_toy_0.0.lock already exists, aborting!"
2532+
self.assertErrorRegex(EasyBuildError, error_pattern, self.test_toy_build, raise_error=True, verbose=False)
2533+
2534+
locks_dir = os.path.join(self.test_prefix, 'locks')
2535+
2536+
# no lock in place, so installation proceeds as normal
2537+
extra_args = ['--locks-dir=%s' % locks_dir]
2538+
self.test_toy_build(extra_args=extra_args, verify=True, raise_error=True)
2539+
2540+
# put lock in place in custom locks dir, try again
2541+
toy_lock_path = os.path.join(locks_dir, toy_lock_fn)
2542+
mkdir(toy_lock_path, parents=True)
2543+
self.assertErrorRegex(EasyBuildError, error_pattern, self.test_toy_build,
2544+
extra_args=extra_args, raise_error=True, verbose=False)
2545+
2546+
# also test use of --ignore-locks
2547+
self.test_toy_build(extra_args=extra_args + ['--ignore-locks'], verify=True, raise_error=True)
2548+
2549+
# define a context manager that remove a lock after a while, so we can check the use of --wait-for-lock
2550+
class remove_lock_after:
2551+
def __init__(self, seconds, lock_fp):
2552+
self.seconds = seconds
2553+
self.lock_fp = lock_fp
2554+
2555+
def remove_lock(self, *args):
2556+
remove_dir(self.lock_fp)
2557+
2558+
def __enter__(self):
2559+
signal.signal(signal.SIGALRM, self.remove_lock)
2560+
signal.alarm(self.seconds)
2561+
2562+
def __exit__(self, type, value, traceback):
2563+
pass
2564+
2565+
# wait for lock to be removed, with 1 second interval of checking
2566+
extra_args.append('--wait-on-lock=1')
2567+
2568+
wait_regex = re.compile("^== lock .*_software_toy_0.0.lock exists, waiting 1 seconds", re.M)
2569+
ok_regex = re.compile("^== COMPLETED: Installation ended successfully", re.M)
2570+
2571+
self.assertTrue(os.path.exists(toy_lock_path))
2572+
2573+
# use context manager to remove lock after 3 seconds
2574+
with remove_lock_after(3, toy_lock_path):
2575+
self.mock_stderr(True)
2576+
self.mock_stdout(True)
2577+
self.test_toy_build(extra_args=extra_args, verify=False, raise_error=True, testing=False)
2578+
stderr, stdout = self.get_stderr(), self.get_stdout()
2579+
self.mock_stderr(False)
2580+
self.mock_stdout(False)
2581+
2582+
self.assertEqual(stderr, '')
2583+
2584+
wait_matches = wait_regex.findall(stdout)
2585+
# we can't rely on an exact number of 'waiting' messages, so let's go with a range...
2586+
self.assertTrue(len(wait_matches) in range(2, 5))
2587+
2588+
self.assertTrue(ok_regex.search(stdout), "Pattern '%s' found in: %s" % (ok_regex.pattern, stdout))
2589+
2590+
# when there is no lock in place, --wait-on-lock has no impact
2591+
self.assertFalse(os.path.exists(toy_lock_path))
2592+
self.mock_stderr(True)
2593+
self.mock_stdout(True)
2594+
self.test_toy_build(extra_args=extra_args, verify=False, raise_error=True, testing=False)
2595+
stderr, stdout = self.get_stderr(), self.get_stdout()
2596+
self.mock_stderr(False)
2597+
self.mock_stdout(False)
2598+
2599+
self.assertEqual(stderr, '')
2600+
self.assertTrue(ok_regex.search(stdout), "Pattern '%s' found in: %s" % (ok_regex.pattern, stdout))
2601+
self.assertFalse(wait_regex.search(stdout), "Pattern '%s' not found in: %s" % (wait_regex.pattern, stdout))
2602+
25192603

25202604
def suite():
25212605
""" return all the tests in this file """

0 commit comments

Comments
 (0)