Skip to content

Avoid leaking keys by mistake with --upload-test-report #4877

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 21, 2025
Merged
51 changes: 49 additions & 2 deletions easybuild/tools/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"""
import copy
import os
import re
import sys
from datetime import datetime
from time import gmtime, strftime
Expand All @@ -58,6 +59,48 @@

_log = fancylogger.getLogger('testing', fname=False)

DEFAULT_EXCLUDE_FROM_TEST_REPORT_ENV_VAR_NAMES = [
'KEY',
'SECRET',
'TOKEN',
'PASSWORD',
'API',
'AUTH',
'CREDENTIAL',
'PRIVATE',
'LICENSE',
'LICENCE',
]
DEFAULT_EXCLUDE_FROM_TEST_REPORT_VALUE_REGEX = [
# From PR comments https://github.com/easybuilders/easybuild-framework/pull/4877
r'AKIA[0-9A-Z]{16}', # AWS access key
r'[A-Za-z0-9/+=]{40}', # AWS secret key
r'eyJ[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+', # JWT token
r'gh[pousr]_[A-Za-z0-9_]{36,}', # GitHub token
r'xox[baprs]-[A-Za-z0-9-]+', # Slack token

# https://github.com/odomojuli/regextokens
# This is too aggressive and can end up excluding any alphanumeric string with length multiple of 4
# r'^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{2}==)?$', # Base64
r'[1-9][0-9]+-[0-9a-zA-Z]{40}', # Twitter token
r'EAACEdEose0cBA[0-9A-Za-z]+', # Facebook token
r'[0-9a-fA-F]{7}.[0-9a-fA-F]{32}', # Instagram token
r'AIza[0-9A-Za-z-_]{35}', # Google API key
r'4/[0-9A-Za-z-_]+', # Google OAuth 2.0 Auth code
r'ya29.[0-9A-Za-z-_]+', # Google OAuth 2.0 access token
r'[rs]k_live_[0-9a-z]{32}', # Picatic/Stripe API key
r'sqOatp-[0-9A-Za-z-_]{22}', # Square Access token
r'access_token,production$[0-9a-z]{161[0-9a,]{32}', # PayPal token
r'55[0-9a-fA-F]{32}', # Twilio token
r'key-[0-9a-zA-Z]{32}', # Mailgun API key
r'[0-9a-f]{32}-us[0-9]{1,2}', # Mailchimp API key
r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}', # Google Cloud Oauth 2.0 token
r'[A-Za-z0-9_]{21}--[A-Za-z0-9_]{8}', # Google Cloud API key
r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}', # Heroku token
r'sk-(.*-)?[A-Za-z0-9]{20}T3BlbkFJ[A-Za-z0-9]{20}', # OpenAI API key
r'waka_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', # WakaTime API key
]


def regtest(easyconfig_paths, modtool, build_specs=None):
"""
Expand Down Expand Up @@ -265,8 +308,12 @@ def create_test_report(msg, ecs_with_res, init_session_state, pr_nrs=None, gist_
for key in sorted(environ_dump.keys()):
if env_filter is not None and env_filter.search(key):
continue
else:
environment += ["%s = %s" % (key, environ_dump[key])]
if any(x in key.upper() for x in DEFAULT_EXCLUDE_FROM_TEST_REPORT_ENV_VAR_NAMES):
continue
value = environ_dump[key]
if any(re.match(rgx, value) for rgx in DEFAULT_EXCLUDE_FROM_TEST_REPORT_VALUE_REGEX):
continue
environment += ["%s = %s" % (key, value)]

test_report.extend(["#### Environment", "```"] + environment + ["```"])

Expand Down
63 changes: 60 additions & 3 deletions test/framework/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -1317,26 +1317,83 @@ def test_github_create_test_report(self):
'log_file': logfile,
}),
]
environ = {
'USER': 'test',
}
JWT_HDR = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9'
JWT_PLD = 'eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNzA4MzQ1MTIzLCJleHAiOjE3MDgzNTUxMjN9'
JWT_SIG = 'SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c'
secret_environ = {
# Test default removal based on variable value
'TOTALLYPUBLICVAR1': 'AKIAIOSFODNN7EXAMPLE', # AWS_ACCESS_KEY
'TOTALLYPUBLICVAR2': 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY', # AWS_SECRET_KEY
'TOTALLYPUBLICVAR3': '.'.join([JWT_HDR, JWT_PLD, JWT_SIG]), # JWT
'TOTALLYPUBLICVAR4': 'ghp_123456789_ABCDEFGHIJKlmnopqrstuvwxyz', # GH_TOKEN
'TOTALLYPUBLICVAR5': 'xoxb-1234567890-1234567890123-ABCDEFabcdef', # SLACK_TOKEN

# Test default removal based on variable name
'API_SOMETHING': '1234567890',
'MY_PASSWORD': '1234567890',
'ABC_TOKEN': '1234567890',
'AUTH_XXX': '1234567890',
'LICENSE': '1234567890',
'WORLD_KEY': '1234567890',
'PRIVATE_INFO': '1234567890',
'SECRET_SECRET': '1234567890',
'INFO_CREDENTIALS': '1234567890',
}
init_session_state = {
'easybuild_configuration': ['EASYBUILD_DEBUG=1'],
'environment': {'USER': 'test'},
'environment': {**environ, **secret_environ},
'module_list': [{'mod_name': 'test'}],
'system_info': {'name': 'test'},
'time': gmtime(0),
}

res = create_test_report("just a test", ecs_with_res, init_session_state)
patterns = [
"**SUCCESS** _test.eb_",
"**FAIL (build issue)** _fail.eb_",
"01 Jan 1970 00:00:00",
"EASYBUILD_DEBUG=1",
"USER = test",
]
for pattern in patterns:
self.assertIn(pattern, res['full'])

for pattern in patterns[:2]:
# Test that known token regexes for ENV vars are excluded by default
exclude_patterns = [
'TOTALLYPUBLICVAR1',
'TOTALLYPUBLICVAR2',
'TOTALLYPUBLICVAR3',
'TOTALLYPUBLICVAR4',
'TOTALLYPUBLICVAR5',

'API_SOMETHING',
'MY_PASSWORD',
'ABC_TOKEN',
'AUTH_XXX',
'LICENSE',
'WORLD_KEY',
'PRIVATE_INFO',
'SECRET_SECRET',
'INFO_CREDENTIALS',
]
for pattern in exclude_patterns:
# .lower() test that variable name is not case sensitive for excluding
self.assertNotIn(pattern.lower(), res['full'])

res = create_test_report("just a test", ecs_with_res, init_session_state)
for pattern in patterns:
self.assertIn(pattern, res['full'])

for pattern in patterns[:2]:
self.assertIn(pattern, res['overview'])

for pattern in exclude_patterns:
# .lower() test that variable name is not case sensitive for excluding
self.assertNotIn(pattern.lower(), res['full'])

# mock create_gist function, we don't want to actually create a gist every time we run this test...
def fake_create_gist(*args, **kwargs):
return 'https://gist.github.com/%s/test' % GITHUB_TEST_ACCOUNT
Expand All @@ -1353,7 +1410,7 @@ def fake_create_gist(*args, **kwargs):
self.assertIn(pattern, res['full'])

for pattern in patterns[:3]:
self.assertIn(pattern, res['full'])
self.assertIn(pattern, res['overview'])

self.assertIn("**SUCCESS** _test.eb_", res['overview'])

Expand Down
36 changes: 28 additions & 8 deletions test/framework/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -3390,26 +3390,46 @@ def toy(extra_args=None):
return test_report_txt

# define environment variables that should (not) show up in the test report
test_var_secret = 'THIS_IS_JUST_A_SECRET_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret] = 'thisshouldremainsecretonrequest'
test_var_secret_regex = re.compile(test_var_secret)
# The name contains an auto-excluded pattern `SECRET`
test_var_secret_always = 'THIS_IS_JUST_A_SECRET_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret_always] = 'thisshouldremainsecretonrequest'
test_var_secret_always_regex = re.compile(test_var_secret_always)
# The name contains an autoexcluded value as a recognized GH token
test_var_secret_always2 = 'THIS_IS_JUST_A_TOTALLY_PUBLIC_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret_always2] = 'ghp_123456789_ABCDEFGHIJKlmnopqrstuvwxyz'
test_var_secret_always_regex2 = re.compile(test_var_secret_always2)
# This should be in general present and excluded on demand
test_var_secret_ondemand = 'THIS_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret_ondemand] = 'thisshouldbehiddenondemand'
test_var_secret_ondemand_regex = re.compile(test_var_secret_ondemand)
test_var_public = 'THIS_IS_JUST_A_PUBLIC_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_public] = 'thisshouldalwaysbeincluded'
test_var_public_regex = re.compile(test_var_public)

# default: no filtering
test_report_txt = toy()
self.assertTrue(test_var_secret_regex.search(test_report_txt))
self.assertTrue(test_var_secret_ondemand_regex.search(test_report_txt))
self.assertTrue(test_var_public_regex.search(test_report_txt))
for rgx in [
test_var_secret_always_regex,
test_var_secret_always_regex2,
]:
res = rgx.search(test_report_txt)
self.assertFalse(res, "No match for %s in %s" % (rgx.pattern, test_report_txt))

# filter out env vars that match specified regex pattern
filter_arg = "--test-report-env-filter=.*_SECRET_ENV_VAR_FOR_EASYBUILD"
filter_arg = "--test-report-env-filter=.*_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD"
test_report_txt = toy(extra_args=[filter_arg])
res = test_var_secret_regex.search(test_report_txt)
self.assertFalse(res, "No match for %s in %s" % (test_var_secret_regex.pattern, test_report_txt))
for rgx in [
test_var_secret_ondemand_regex,
test_var_secret_always_regex,
test_var_secret_always_regex2,
]:
res = rgx.search(test_report_txt)
self.assertFalse(res, "No match for %s in %s" % (rgx.pattern, test_report_txt))
self.assertTrue(test_var_public_regex.search(test_report_txt))
# make sure that used filter is reported correctly in test report
filter_arg_regex = re.compile(r"--test-report-env-filter='.\*_SECRET_ENV_VAR_FOR_EASYBUILD'")
filter_arg_regex = re.compile(r"--test-report-env-filter='.\*_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD'")
tup = (filter_arg_regex.pattern, test_report_txt)
self.assertTrue(filter_arg_regex.search(test_report_txt), "%s in %s" % tup)

Expand Down