diff --git a/easybuild/tools/testing.py b/easybuild/tools/testing.py index 2f0b0c8a6f..32dd24178b 100644 --- a/easybuild/tools/testing.py +++ b/easybuild/tools/testing.py @@ -37,6 +37,7 @@ """ import copy import os +import re import sys from datetime import datetime from time import gmtime, strftime @@ -58,6 +59,48 @@ _log = fancylogger.getLogger('testing', fname=False) +DEFAULT_EXCLUDE_FROM_TEST_REPORT_ENV_VAR_NAMES = [ + 'KEY', + 'SECRET', + 'TOKEN', + 'PASSWORD', + 'API', + 'AUTH', + 'CREDENTIAL', + 'PRIVATE', + 'LICENSE', + 'LICENCE', +] +DEFAULT_EXCLUDE_FROM_TEST_REPORT_VALUE_REGEX = [ + # From PR comments https://github.com/easybuilders/easybuild-framework/pull/4877 + r'AKIA[0-9A-Z]{16}', # AWS access key + r'[A-Za-z0-9/+=]{40}', # AWS secret key + r'eyJ[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+', # JWT token + r'gh[pousr]_[A-Za-z0-9_]{36,}', # GitHub token + r'xox[baprs]-[A-Za-z0-9-]+', # Slack token + + # https://github.com/odomojuli/regextokens + # This is too aggressive and can end up excluding any alphanumeric string with length multiple of 4 + # r'^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{2}==)?$', # Base64 + r'[1-9][0-9]+-[0-9a-zA-Z]{40}', # Twitter token + r'EAACEdEose0cBA[0-9A-Za-z]+', # Facebook token + r'[0-9a-fA-F]{7}.[0-9a-fA-F]{32}', # Instagram token + r'AIza[0-9A-Za-z-_]{35}', # Google API key + r'4/[0-9A-Za-z-_]+', # Google OAuth 2.0 Auth code + r'ya29.[0-9A-Za-z-_]+', # Google OAuth 2.0 access token + r'[rs]k_live_[0-9a-z]{32}', # Picatic/Stripe API key + r'sqOatp-[0-9A-Za-z-_]{22}', # Square Access token + r'access_token,production$[0-9a-z]{161[0-9a,]{32}', # PayPal token + r'55[0-9a-fA-F]{32}', # Twilio token + r'key-[0-9a-zA-Z]{32}', # Mailgun API key + r'[0-9a-f]{32}-us[0-9]{1,2}', # Mailchimp API key + r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}', # Google Cloud Oauth 2.0 token + r'[A-Za-z0-9_]{21}--[A-Za-z0-9_]{8}', # Google Cloud API key + r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}', # Heroku token + r'sk-(.*-)?[A-Za-z0-9]{20}T3BlbkFJ[A-Za-z0-9]{20}', # OpenAI API key + r'waka_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', # WakaTime API key +] + def regtest(easyconfig_paths, modtool, build_specs=None): """ @@ -265,8 +308,12 @@ def create_test_report(msg, ecs_with_res, init_session_state, pr_nrs=None, gist_ for key in sorted(environ_dump.keys()): if env_filter is not None and env_filter.search(key): continue - else: - environment += ["%s = %s" % (key, environ_dump[key])] + if any(x in key.upper() for x in DEFAULT_EXCLUDE_FROM_TEST_REPORT_ENV_VAR_NAMES): + continue + value = environ_dump[key] + if any(re.match(rgx, value) for rgx in DEFAULT_EXCLUDE_FROM_TEST_REPORT_VALUE_REGEX): + continue + environment += ["%s = %s" % (key, value)] test_report.extend(["#### Environment", "```"] + environment + ["```"]) diff --git a/test/framework/github.py b/test/framework/github.py index ea19d4313d..083dd163bd 100644 --- a/test/framework/github.py +++ b/test/framework/github.py @@ -1317,26 +1317,83 @@ def test_github_create_test_report(self): 'log_file': logfile, }), ] + environ = { + 'USER': 'test', + } + JWT_HDR = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9' + JWT_PLD = 'eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNzA4MzQ1MTIzLCJleHAiOjE3MDgzNTUxMjN9' + JWT_SIG = 'SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c' + secret_environ = { + # Test default removal based on variable value + 'TOTALLYPUBLICVAR1': 'AKIAIOSFODNN7EXAMPLE', # AWS_ACCESS_KEY + 'TOTALLYPUBLICVAR2': 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY', # AWS_SECRET_KEY + 'TOTALLYPUBLICVAR3': '.'.join([JWT_HDR, JWT_PLD, JWT_SIG]), # JWT + 'TOTALLYPUBLICVAR4': 'ghp_123456789_ABCDEFGHIJKlmnopqrstuvwxyz', # GH_TOKEN + 'TOTALLYPUBLICVAR5': 'xoxb-1234567890-1234567890123-ABCDEFabcdef', # SLACK_TOKEN + + # Test default removal based on variable name + 'API_SOMETHING': '1234567890', + 'MY_PASSWORD': '1234567890', + 'ABC_TOKEN': '1234567890', + 'AUTH_XXX': '1234567890', + 'LICENSE': '1234567890', + 'WORLD_KEY': '1234567890', + 'PRIVATE_INFO': '1234567890', + 'SECRET_SECRET': '1234567890', + 'INFO_CREDENTIALS': '1234567890', + } init_session_state = { 'easybuild_configuration': ['EASYBUILD_DEBUG=1'], - 'environment': {'USER': 'test'}, + 'environment': {**environ, **secret_environ}, 'module_list': [{'mod_name': 'test'}], 'system_info': {'name': 'test'}, 'time': gmtime(0), } + res = create_test_report("just a test", ecs_with_res, init_session_state) patterns = [ "**SUCCESS** _test.eb_", "**FAIL (build issue)** _fail.eb_", "01 Jan 1970 00:00:00", "EASYBUILD_DEBUG=1", + "USER = test", ] for pattern in patterns: self.assertIn(pattern, res['full']) - for pattern in patterns[:2]: + # Test that known token regexes for ENV vars are excluded by default + exclude_patterns = [ + 'TOTALLYPUBLICVAR1', + 'TOTALLYPUBLICVAR2', + 'TOTALLYPUBLICVAR3', + 'TOTALLYPUBLICVAR4', + 'TOTALLYPUBLICVAR5', + + 'API_SOMETHING', + 'MY_PASSWORD', + 'ABC_TOKEN', + 'AUTH_XXX', + 'LICENSE', + 'WORLD_KEY', + 'PRIVATE_INFO', + 'SECRET_SECRET', + 'INFO_CREDENTIALS', + ] + for pattern in exclude_patterns: + # .lower() test that variable name is not case sensitive for excluding + self.assertNotIn(pattern.lower(), res['full']) + + res = create_test_report("just a test", ecs_with_res, init_session_state) + for pattern in patterns: self.assertIn(pattern, res['full']) + for pattern in patterns[:2]: + self.assertIn(pattern, res['overview']) + + for pattern in exclude_patterns: + # .lower() test that variable name is not case sensitive for excluding + self.assertNotIn(pattern.lower(), res['full']) + # mock create_gist function, we don't want to actually create a gist every time we run this test... def fake_create_gist(*args, **kwargs): return 'https://gist.github.com/%s/test' % GITHUB_TEST_ACCOUNT @@ -1353,7 +1410,7 @@ def fake_create_gist(*args, **kwargs): self.assertIn(pattern, res['full']) for pattern in patterns[:3]: - self.assertIn(pattern, res['full']) + self.assertIn(pattern, res['overview']) self.assertIn("**SUCCESS** _test.eb_", res['overview']) diff --git a/test/framework/options.py b/test/framework/options.py index d5823157c5..4e3f14d6bf 100644 --- a/test/framework/options.py +++ b/test/framework/options.py @@ -3390,26 +3390,46 @@ def toy(extra_args=None): return test_report_txt # define environment variables that should (not) show up in the test report - test_var_secret = 'THIS_IS_JUST_A_SECRET_ENV_VAR_FOR_EASYBUILD' - os.environ[test_var_secret] = 'thisshouldremainsecretonrequest' - test_var_secret_regex = re.compile(test_var_secret) + # The name contains an auto-excluded pattern `SECRET` + test_var_secret_always = 'THIS_IS_JUST_A_SECRET_ENV_VAR_FOR_EASYBUILD' + os.environ[test_var_secret_always] = 'thisshouldremainsecretonrequest' + test_var_secret_always_regex = re.compile(test_var_secret_always) + # The name contains an autoexcluded value as a recognized GH token + test_var_secret_always2 = 'THIS_IS_JUST_A_TOTALLY_PUBLIC_ENV_VAR_FOR_EASYBUILD' + os.environ[test_var_secret_always2] = 'ghp_123456789_ABCDEFGHIJKlmnopqrstuvwxyz' + test_var_secret_always_regex2 = re.compile(test_var_secret_always2) + # This should be in general present and excluded on demand + test_var_secret_ondemand = 'THIS_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD' + os.environ[test_var_secret_ondemand] = 'thisshouldbehiddenondemand' + test_var_secret_ondemand_regex = re.compile(test_var_secret_ondemand) test_var_public = 'THIS_IS_JUST_A_PUBLIC_ENV_VAR_FOR_EASYBUILD' os.environ[test_var_public] = 'thisshouldalwaysbeincluded' test_var_public_regex = re.compile(test_var_public) # default: no filtering test_report_txt = toy() - self.assertTrue(test_var_secret_regex.search(test_report_txt)) + self.assertTrue(test_var_secret_ondemand_regex.search(test_report_txt)) self.assertTrue(test_var_public_regex.search(test_report_txt)) + for rgx in [ + test_var_secret_always_regex, + test_var_secret_always_regex2, + ]: + res = rgx.search(test_report_txt) + self.assertFalse(res, "No match for %s in %s" % (rgx.pattern, test_report_txt)) # filter out env vars that match specified regex pattern - filter_arg = "--test-report-env-filter=.*_SECRET_ENV_VAR_FOR_EASYBUILD" + filter_arg = "--test-report-env-filter=.*_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD" test_report_txt = toy(extra_args=[filter_arg]) - res = test_var_secret_regex.search(test_report_txt) - self.assertFalse(res, "No match for %s in %s" % (test_var_secret_regex.pattern, test_report_txt)) + for rgx in [ + test_var_secret_ondemand_regex, + test_var_secret_always_regex, + test_var_secret_always_regex2, + ]: + res = rgx.search(test_report_txt) + self.assertFalse(res, "No match for %s in %s" % (rgx.pattern, test_report_txt)) self.assertTrue(test_var_public_regex.search(test_report_txt)) # make sure that used filter is reported correctly in test report - filter_arg_regex = re.compile(r"--test-report-env-filter='.\*_SECRET_ENV_VAR_FOR_EASYBUILD'") + filter_arg_regex = re.compile(r"--test-report-env-filter='.\*_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD'") tup = (filter_arg_regex.pattern, test_report_txt) self.assertTrue(filter_arg_regex.search(test_report_txt), "%s in %s" % tup)