Skip to content

Commit c14b549

Browse files
committed
enhance apply_regex_substitutions to support multi-line matches
It might be required to replace patterns with more context, e.g. content of the next or previous line to disambiguate otherwise too generic matches. Add parameter `single_line` to enable the old behavior (default) of matching per line and otherwise match the whole text. Add parameter `match_all` to require all patterns to match for each file not only at least one.
1 parent b37f707 commit c14b549

File tree

2 files changed

+87
-39
lines changed

2 files changed

+87
-39
lines changed

easybuild/tools/filetools.py

Lines changed: 54 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1643,16 +1643,22 @@ def apply_patch(patch_file, dest, fn=None, copy=False, level=None, use_git_am=Fa
16431643
return True
16441644

16451645

1646-
def apply_regex_substitutions(paths, regex_subs, backup='.orig.eb', on_missing_match=None):
1646+
def apply_regex_substitutions(paths, regex_subs, backup='.orig.eb',
1647+
on_missing_match=None, match_all=False, single_line=True):
16471648
"""
16481649
Apply specified list of regex substitutions.
16491650
16501651
:param paths: list of paths to files to patch (or just a single filepath)
1651-
:param regex_subs: list of substitutions to apply, specified as (<regexp pattern>, <replacement string>)
1652+
:param regex_subs: list of substitutions to apply,
1653+
specified as (<regexp pattern or regex instance>, <replacement string>)
16521654
:param backup: create backup of original file with specified suffix (no backup if value evaluates to False)
16531655
:param on_missing_match: Define what to do when no match was found in the file.
16541656
Can be 'error' to raise an error, 'warn' to print a warning or 'ignore' to do nothing
16551657
Defaults to the value of --strict
1658+
:param match_all: Expect to match all patterns in all files
1659+
instead of at least one per file for error/warning reporting
1660+
:param single_line: Replace first match of each pattern for each line in the order of the patterns.
1661+
If False the patterns are applied in order to the full text and may match line breaks.
16561662
"""
16571663
if on_missing_match is None:
16581664
on_missing_match = build_option('strict')
@@ -1664,18 +1670,22 @@ def apply_regex_substitutions(paths, regex_subs, backup='.orig.eb', on_missing_m
16641670
if isinstance(paths, string_type):
16651671
paths = [paths]
16661672

1673+
flags = 0 if single_line else re.M
1674+
compiled_regex_subs = [(re.compile(regex, flags) if isinstance(regex, str) else regex, subtxt)
1675+
for (regex, subtxt) in regex_subs]
1676+
16671677
# only report when in 'dry run' mode
16681678
if build_option('extended_dry_run'):
16691679
paths_str = ', '.join(paths)
16701680
dry_run_msg("applying regex substitutions to file(s): %s" % paths_str, silent=build_option('silent'))
1671-
for regex, subtxt in regex_subs:
1672-
dry_run_msg(" * regex pattern '%s', replacement string '%s'" % (regex, subtxt))
1681+
for regex, subtxt in compiled_regex_subs:
1682+
dry_run_msg(" * regex pattern '%s', replacement string '%s'" % (regex.pattern, subtxt))
16731683

16741684
else:
1675-
_log.info("Applying following regex substitutions to %s: %s", paths, regex_subs)
1676-
1677-
compiled_regex_subs = [(re.compile(regex), subtxt) for (regex, subtxt) in regex_subs]
1685+
_log.info("Applying following regex substitutions to %s: %s",
1686+
paths, [(regex.pattern, subtxt) for regex, subtxt in compiled_regex_subs])
16781687

1688+
replacement_failed_msgs = []
16791689
for path in paths:
16801690
try:
16811691
# make sure that file can be opened in text mode;
@@ -1695,32 +1705,49 @@ def apply_regex_substitutions(paths, regex_subs, backup='.orig.eb', on_missing_m
16951705
if backup:
16961706
copy_file(path, path + backup)
16971707
replacement_msgs = []
1708+
replaced = [False] * len(compiled_regex_subs)
16981709
with open_file(path, 'w') as out_file:
1699-
lines = txt_utf8.split('\n')
1700-
del txt_utf8
1701-
for line_id, line in enumerate(lines):
1702-
for regex, subtxt in compiled_regex_subs:
1703-
match = regex.search(line)
1704-
if match:
1710+
if single_line:
1711+
lines = txt_utf8.split('\n')
1712+
del txt_utf8
1713+
for line_id, line in enumerate(lines):
1714+
for i, (regex, subtxt) in enumerate(compiled_regex_subs):
1715+
match = regex.search(line)
1716+
if match:
1717+
origtxt = match.group(0)
1718+
replacement_msgs.append("Replaced in line %d: '%s' -> '%s'" %
1719+
(line_id + 1, origtxt, subtxt))
1720+
replaced[i] = True
1721+
line = regex.sub(subtxt, line)
1722+
lines[line_id] = line
1723+
out_file.write('\n'.join(lines))
1724+
else:
1725+
for i, (regex, subtxt) in enumerate(compiled_regex_subs):
1726+
def do_replace(match):
17051727
origtxt = match.group(0)
1706-
replacement_msgs.append("Replaced in line %d: '%s' -> '%s'" %
1707-
(line_id + 1, origtxt, subtxt))
1708-
line = regex.sub(subtxt, line)
1709-
lines[line_id] = line
1710-
out_file.write('\n'.join(lines))
1728+
# pylint: disable=cell-var-from-loop
1729+
cur_subtxt = match.expand(subtxt)
1730+
# pylint: disable=cell-var-from-loop
1731+
replacement_msgs.append("Replaced: '%s' -> '%s'" % (origtxt, cur_subtxt))
1732+
return cur_subtxt
1733+
txt_utf8, replaced[i] = regex.subn(do_replace, txt_utf8)
1734+
out_file.write(txt_utf8)
17111735
if replacement_msgs:
17121736
_log.info('Applied the following substitutions to %s:\n%s', path, '\n'.join(replacement_msgs))
1713-
else:
1714-
msg = 'Nothing found to replace in %s' % path
1715-
if on_missing_match == ERROR:
1716-
raise EasyBuildError(msg)
1717-
elif on_missing_match == WARN:
1718-
_log.warning(msg)
1719-
else:
1720-
_log.info(msg)
1721-
1737+
if (match_all and not all(replaced)) or (not match_all and not any(replaced)):
1738+
errors = ["Nothing found to replace '%s'" % regex.pattern
1739+
for cur_replaced, (regex, _) in zip(replaced, compiled_regex_subs) if not cur_replaced]
1740+
replacement_failed_msgs.append(', '.join(errors) + ' in ' + path)
17221741
except (IOError, OSError) as err:
17231742
raise EasyBuildError("Failed to patch %s: %s", path, err)
1743+
if replacement_failed_msgs:
1744+
msg = '\n'.join(replacement_failed_msgs)
1745+
if on_missing_match == ERROR:
1746+
raise EasyBuildError(msg)
1747+
elif on_missing_match == WARN:
1748+
_log.warning(msg)
1749+
else:
1750+
_log.info(msg)
17241751

17251752

17261753
def modify_env(old, new):

test/framework/filetools.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,16 +1443,24 @@ def test_apply_regex_substitutions(self):
14431443
# Check handling of on_missing_match
14441444
ft.write_file(testfile, testtxt)
14451445
regex_subs_no_match = [('Not there', 'Not used')]
1446-
error_pat = 'Nothing found to replace in %s' % testfile
1446+
error_pat = "Nothing found to replace 'Not there' in %s" % testfile
14471447
# Error
14481448
self.assertErrorRegex(EasyBuildError, error_pat, ft.apply_regex_substitutions, testfile, regex_subs_no_match,
14491449
on_missing_match=run.ERROR)
1450+
# First matches, but 2nd not
1451+
regex_subs_part_match = [regex_subs[0], ('Not there', 'Not used')]
1452+
self.assertErrorRegex(EasyBuildError, error_pat, ft.apply_regex_substitutions, testfile, regex_subs_part_match,
1453+
on_missing_match=run.ERROR)
14501454

14511455
# Warn
14521456
with self.log_to_testlogfile():
14531457
ft.apply_regex_substitutions(testfile, regex_subs_no_match, on_missing_match=run.WARN)
14541458
logtxt = ft.read_file(self.logfile)
14551459
self.assertIn('WARNING ' + error_pat, logtxt)
1460+
with self.log_to_testlogfile():
1461+
ft.apply_regex_substitutions(testfile, regex_subs_part_match, on_missing_match=run.WARN)
1462+
logtxt = ft.read_file(self.logfile)
1463+
self.assertIn('WARNING ' + error_pat, logtxt)
14561464

14571465
# Ignore
14581466
with self.log_to_testlogfile():
@@ -1465,6 +1473,21 @@ def test_apply_regex_substitutions(self):
14651473
path = os.path.join(self.test_prefix, 'nosuchfile.txt')
14661474
self.assertErrorRegex(EasyBuildError, error_pat, ft.apply_regex_substitutions, path, regex_subs)
14671475

1476+
# Replace multi-line strings
1477+
testtxt = "This si wrong\nBut mkae right\nLeave this!"
1478+
expected_testtxt = 'This is wrong.\nBut make right\nLeave this!'
1479+
ft.write_file(testfile, testtxt)
1480+
repl = ('This si( .*)\n(.*)mkae right$', 'This is wrong.\nBut make right')
1481+
ft.apply_regex_substitutions(testfile, [repl], backup=False, on_missing_match=ERROR, single_line=False)
1482+
new_testtxt = ft.read_file(testfile)
1483+
self.assertEqual(new_testtxt, expected_testtxt)
1484+
# Supports capture groups
1485+
ft.write_file(testfile, testtxt)
1486+
repl = ('This si( .*)\n(.*)mkae right$', r'This is\1.\n\2make right')
1487+
ft.apply_regex_substitutions(testfile, [repl], backup=False, on_missing_match=ERROR, single_line=False)
1488+
new_testtxt = ft.read_file(testfile)
1489+
self.assertEqual(new_testtxt, expected_testtxt)
1490+
14681491
# make sure apply_regex_substitutions can patch files that include UTF-8 characters
14691492
testtxt = b"foo \xe2\x80\x93 bar" # This is an UTF-8 "-"
14701493
ft.write_file(testfile, testtxt)
@@ -1485,34 +1508,32 @@ def test_apply_regex_substitutions(self):
14851508

14861509
# also test apply_regex_substitutions with a *list* of paths
14871510
# cfr. https://github.com/easybuilders/easybuild-framework/issues/3493
1511+
# and a compiled regex
14881512
test_dir = os.path.join(self.test_prefix, 'test_dir')
14891513
test_file1 = os.path.join(test_dir, 'one.txt')
14901514
test_file2 = os.path.join(test_dir, 'two.txt')
14911515
ft.write_file(test_file1, "Donald is an elephant")
14921516
ft.write_file(test_file2, "2 + 2 = 5")
14931517
regexs = [
1494-
('Donald', 'Dumbo'),
1518+
(re.compile('donald', re.I), 'Dumbo'), # Only matches if this is used as-is
14951519
('= 5', '= 4'),
14961520
]
14971521
ft.apply_regex_substitutions([test_file1, test_file2], regexs)
14981522

14991523
# also check dry run mode
15001524
init_config(build_options={'extended_dry_run': True})
1501-
self.mock_stderr(True)
1502-
self.mock_stdout(True)
1503-
ft.apply_regex_substitutions([test_file1, test_file2], regexs)
1504-
stderr, stdout = self.get_stderr(), self.get_stdout()
1505-
self.mock_stderr(False)
1506-
self.mock_stdout(False)
1525+
with self.mocked_stdout_stderr():
1526+
ft.apply_regex_substitutions([test_file1, test_file2], regexs)
1527+
stderr, stdout = self.get_stderr(), self.get_stdout()
15071528

15081529
self.assertFalse(stderr)
1509-
regex = re.compile('\n'.join([
1530+
regex = '\n'.join([
15101531
r"applying regex substitutions to file\(s\): .*/test_dir/one.txt, .*/test_dir/two.txt",
1511-
r" \* regex pattern 'Donald', replacement string 'Dumbo'",
1532+
r" \* regex pattern 'donald', replacement string 'Dumbo'",
15121533
r" \* regex pattern '= 5', replacement string '= 4'",
15131534
'',
1514-
]))
1515-
self.assertTrue(regex.search(stdout), "Pattern '%s' should be found in: %s" % (regex.pattern, stdout))
1535+
])
1536+
self.assertRegex(stdout, regex)
15161537

15171538
def test_find_flexlm_license(self):
15181539
"""Test find_flexlm_license function."""

0 commit comments

Comments
 (0)