Skip to content

Avoid leaking keys by mistake with --upload-test-report #4877

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
May 21, 2025
Merged
69 changes: 67 additions & 2 deletions easybuild/tools/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"""
import copy
import os
import re
import sys
from datetime import datetime
from time import gmtime, strftime
Expand All @@ -58,6 +59,48 @@

_log = fancylogger.getLogger('testing', fname=False)

_exclude_env_from_report = []
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like the only reason that this is added is so we have something to play with in the tests?

That's a bad pattern, I think it's better pull tricks in the tests (like changing constants in place and than restoring their original value in the tearDown of the tests) rather than introducing global variables that are only there to play with in the test suite.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My idea was for this functions to also be used outside of framework, eg for an easyblock to excluded a specific environment variable from reports that is know to contain a secret

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if there would be a better solution to implement excluding variables on demand.
In case we want more discussion around that, i think the default excludes should be included ASAP, so i could split this PR in 2 if needed

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fact that _exclude_env_from_report is a global variable here is a problem, since that means it'll be shared across multiple easyblocks used in a single EasyBuild session...

DEFAULT_EXCLUDE_FROM_REPORT = [
'KEY',
'SECRET',
'TOKEN',
'PASSWORD',
'API',
'AUTH',
'CREDENTIAL',
'PRIVATE',
'LICENSE',
'LICENCE',
]
DEFAULT_EXCLUDE_FROM_REPORT_RGX = [
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We use regex rather than rgx quite consistently across the EasyBuild codebase, so I prefer also using it here.

Also, the name of the constant should be a bit more descriptive, it's too vague/broad, so something like:

Suggested change
DEFAULT_EXCLUDE_FROM_REPORT_RGX = [
DEFAULT_EXCLUDE_FROM_TEST_REPORT_VALUE_REGEX = [

Similar above for DEFAULT_EXCLUDE_FROM_REPORT, which I would rename to something like DEFAULT_EXCLUDE_FROM_TEST_REPORT_ENV_VAR_NAMES

Constants are here to stay, they effectively become part of the EasyBuild framework API, so we better try and make sure they have names that don't leave much room for guessing...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done in 9cf65b5

# From PR comments https://github.com/easybuilders/easybuild-framework/pull/4877
r'AKIA[0-9A-Z]{16}', # AWS access key
r'[A-Za-z0-9/+=]{40}', # AWS secret key
r'eyJ[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+\.[A-Za-z0-9-_]+', # JWT token
r'gh[pousr]_[A-Za-z0-9_]{36,}', # GitHub token
r'xox[baprs]-[A-Za-z0-9-]+', # Slack token

# https://github.com/odomojuli/regextokens
r'^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{2}==)?$', # Base64
r'[1-9][0-9]+-[0-9a-zA-Z]{40}', # Twitter token
r'EAACEdEose0cBA[0-9A-Za-z]+', # Facebook token
r'[0-9a-fA-F]{7}.[0-9a-fA-F]{32}', # Instagram token
r'AIza[0-9A-Za-z-_]{35}', # Google API key
r'4/[0-9A-Za-z-_]+', # Google OAuth 2.0 Auth code
r'ya29.[0-9A-Za-z-_]+', # Google OAuth 2.0 access token
r'[rs]k_live_[0-9a-z]{32}', # Picatic/Stripe API key
r'sqOatp-[0-9A-Za-z-_]{22}', # Square Access token
r'access_token,production$[0-9a-z]{161[0-9a,]{32}', # PayPal token
r'55[0-9a-fA-F]{32}', # Twilio token
r'key-[0-9a-zA-Z]{32}', # Mailgun API key
r'[0-9a-f]{32}-us[0-9]{1,2}', # Mailchimp API key
r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}', # Google Cloud Oauth 2.0 token
r'[A-Za-z0-9_]{21}--[A-Za-z0-9_]{8}', # Google Cloud API key
r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}', # Heroku token
r'sk-(.*-)?[A-Za-z0-9]{20}T3BlbkFJ[A-Za-z0-9]{20}', # OpenAI API key
r'waka_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', # WakaTime API key
]


def regtest(easyconfig_paths, modtool, build_specs=None):
"""
Expand Down Expand Up @@ -140,6 +183,21 @@ def session_state():
}


def exclude_env_from_report_add(key):
"""
Exclude key from test report if an environment variables contains key.
:param key: environment variable to exclude
"""
_exclude_env_from_report.append(key.upper())


def exclude_env_from_report_clear():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only used in the tests, it's fine to twiddle with internal things there, so I wouldn't define a custom function for this, and just play with _exclude_env_from_report directly in the tests?

Likewise for exclude_env_from_report_add

"""
Clear list of environment variables to exclude from test report.
"""
_exclude_env_from_report.clear()


def create_test_report(msg, ecs_with_res, init_session_state, pr_nrs=None, gist_log=False, easyblock_pr_nrs=None,
ec_parse_error=None):
"""
Expand Down Expand Up @@ -265,8 +323,15 @@ def create_test_report(msg, ecs_with_res, init_session_state, pr_nrs=None, gist_
for key in sorted(environ_dump.keys()):
if env_filter is not None and env_filter.search(key):
continue
else:
environment += ["%s = %s" % (key, environ_dump[key])]
value = environ_dump[key]
if any(re.match(rgx, value) for rgx in DEFAULT_EXCLUDE_FROM_REPORT_RGX):
continue
environment += ["%s = %s" % (key, value)]

environment = list(filter(
lambda x: not any(y in x.upper() for y in DEFAULT_EXCLUDE_FROM_REPORT + _exclude_env_from_report),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we also tackle that part above?

I.e. only add a key-value pair if the key (env var name) doesn't have a partial match with anything in DEFAULT_EXCLUDE_FROM_TEST_REPORT_ENV_VAR_NAMES?

The combination of filter with a lambda expression seems way too involved for what's going on here, I'd rather see a 2nd simple condition with a continue above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done in 4cf9c83

environment
))

test_report.extend(["#### Environment", "```"] + environment + ["```"])

Expand Down
79 changes: 76 additions & 3 deletions test/framework/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
from easybuild.tools.github import det_pr_title, fetch_easyconfigs_from_commit, fetch_files_from_commit
from easybuild.tools.github import is_patch_for, pick_default_branch
from easybuild.tools.testing import create_test_report, post_pr_test_report, session_state
from easybuild.tools.testing import exclude_env_from_report_add, exclude_env_from_report_clear
import easybuild.tools.github as gh

try:
Expand Down Expand Up @@ -1317,26 +1318,98 @@ def test_github_create_test_report(self):
'log_file': logfile,
}),
]
environ = {
'USER': 'test',
'DONT_REMOVE_ME': 'test-123',
}
JWT_HDR = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9'
JWT_PLD = 'eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNzA4MzQ1MTIzLCJleHAiOjE3MDgzNTUxMjN9'
JWT_SIG = 'SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c'
secret_environ = {
'REMOVEME': 'test-abc',
'ReMoVeMe2': 'TeSt-xyz',

# Test default removal based on variable value
'TOTALLYPUBLICVAR1': 'AKIAIOSFODNN7EXAMPLE', # AWS_ACCESS_KEY
'TOTALLYPUBLICVAR2': 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY', # AWS_SECRET_KEY
'TOTALLYPUBLICVAR3': '.'.join([JWT_HDR, JWT_PLD, JWT_SIG]), # JWT
'TOTALLYPUBLICVAR4': 'ghp_123456789_ABCDEFGHIJKlmnopqrstuvwxyz', # GH_TOKEN
'TOTALLYPUBLICVAR5': 'xoxb-1234567890-1234567890123-ABCDEFabcdef', # SLACK_TOKEN

# Test default removal based on variable name
'API_SOMETHING': '1234567890',
'MY_PASSWORD': '1234567890',
'ABC_TOKEN': '1234567890',
'AUTH_XXX': '1234567890',
'LICENSE': '1234567890',
'WORLD_KEY': '1234567890',
'PRIVATE_INFO': '1234567890',
'SECRET_SECRET': '1234567890',
'INFO_CREDENTIALS': '1234567890',
}
init_session_state = {
'easybuild_configuration': ['EASYBUILD_DEBUG=1'],
'environment': {'USER': 'test'},
'environment': {**environ, **secret_environ},
'module_list': [{'mod_name': 'test'}],
'system_info': {'name': 'test'},
'time': gmtime(0),
}

# Test exclude_env_from_report_add/clear
exclude_env_from_report_add('REMOVEME'.lower()) # Also check that the name is uppercased in the check

res = create_test_report("just a test", ecs_with_res, init_session_state)
patterns = [
"**SUCCESS** _test.eb_",
"**FAIL (build issue)** _fail.eb_",
"01 Jan 1970 00:00:00",
"EASYBUILD_DEBUG=1",
"DONT_REMOVE_ME = test-123",
]
for pattern in patterns:
self.assertIn(pattern, res['full'])

for pattern in patterns[:2]:
# Test that excluded patterns works by matching also partial strings
exclude_patterns1 = [
'REMOVEME',
'ReMoVeMe2',
]
# Test that known token regexes for ENV vars are excluded by default
exclude_patterns2 = [
'TOTALLYPUBLICVAR1',
'TOTALLYPUBLICVAR2',
'TOTALLYPUBLICVAR3',
'TOTALLYPUBLICVAR4',
'TOTALLYPUBLICVAR5',

'API_SOMETHING',
'MY_PASSWORD',
'ABC_TOKEN',
'AUTH_XXX',
'LICENSE',
'WORLD_KEY',
'PRIVATE_INFO',
'SECRET_SECRET',
'INFO_CREDENTIALS',
]
for pattern in exclude_patterns1 + exclude_patterns2:
# .lower() test that variable name is not case sensitive for excluding
self.assertNotIn(pattern.lower(), res['full'])

exclude_env_from_report_clear()
patterns += exclude_patterns1

res = create_test_report("just a test", ecs_with_res, init_session_state)
for pattern in patterns:
self.assertIn(pattern, res['full'])

for pattern in patterns[:2]:
self.assertIn(pattern, res['overview'])

for pattern in exclude_patterns2:
# .lower() test that variable name is not case sensitive for excluding
self.assertNotIn(pattern.lower(), res['full'])

# mock create_gist function, we don't want to actually create a gist every time we run this test...
def fake_create_gist(*args, **kwargs):
return 'https://gist.github.com/%s/test' % GITHUB_TEST_ACCOUNT
Expand All @@ -1353,7 +1426,7 @@ def fake_create_gist(*args, **kwargs):
self.assertIn(pattern, res['full'])

for pattern in patterns[:3]:
self.assertIn(pattern, res['full'])
self.assertIn(pattern, res['overview'])

self.assertIn("**SUCCESS** _test.eb_", res['overview'])

Expand Down
36 changes: 28 additions & 8 deletions test/framework/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -3390,26 +3390,46 @@ def toy(extra_args=None):
return test_report_txt

# define environment variables that should (not) show up in the test report
test_var_secret = 'THIS_IS_JUST_A_SECRET_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret] = 'thisshouldremainsecretonrequest'
test_var_secret_regex = re.compile(test_var_secret)
# The name contains an auto-excluded pattern `SECRET`
test_var_secret_always = 'THIS_IS_JUST_A_SECRET_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret_always] = 'thisshouldremainsecretonrequest'
test_var_secret_always_regex = re.compile(test_var_secret_always)
# The name contains an autoexcluded value as a recognized GH token
test_var_secret_always2 = 'THIS_IS_JUST_A_TOTALLY_PUBLIC_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret_always2] = 'ghp_123456789_ABCDEFGHIJKlmnopqrstuvwxyz'
test_var_secret_always_regex2 = re.compile(test_var_secret_always2)
# This should be in general present and excluded on demand
test_var_secret_ondemand = 'THIS_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_secret_ondemand] = 'thisshouldbehiddenondemand'
test_var_secret_ondemand_regex = re.compile(test_var_secret_ondemand)
test_var_public = 'THIS_IS_JUST_A_PUBLIC_ENV_VAR_FOR_EASYBUILD'
os.environ[test_var_public] = 'thisshouldalwaysbeincluded'
test_var_public_regex = re.compile(test_var_public)

# default: no filtering
test_report_txt = toy()
self.assertTrue(test_var_secret_regex.search(test_report_txt))
self.assertTrue(test_var_secret_ondemand_regex.search(test_report_txt))
self.assertTrue(test_var_public_regex.search(test_report_txt))
for rgx in [
test_var_secret_always_regex,
test_var_secret_always_regex2,
]:
res = rgx.search(test_report_txt)
self.assertFalse(res, "No match for %s in %s" % (rgx.pattern, test_report_txt))

# filter out env vars that match specified regex pattern
filter_arg = "--test-report-env-filter=.*_SECRET_ENV_VAR_FOR_EASYBUILD"
filter_arg = "--test-report-env-filter=.*_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD"
test_report_txt = toy(extra_args=[filter_arg])
res = test_var_secret_regex.search(test_report_txt)
self.assertFalse(res, "No match for %s in %s" % (test_var_secret_regex.pattern, test_report_txt))
for rgx in [
test_var_secret_ondemand_regex,
test_var_secret_always_regex,
test_var_secret_always_regex2,
]:
res = rgx.search(test_report_txt)
self.assertFalse(res, "No match for %s in %s" % (rgx.pattern, test_report_txt))
self.assertTrue(test_var_public_regex.search(test_report_txt))
# make sure that used filter is reported correctly in test report
filter_arg_regex = re.compile(r"--test-report-env-filter='.\*_SECRET_ENV_VAR_FOR_EASYBUILD'")
filter_arg_regex = re.compile(r"--test-report-env-filter='.\*_IS_A_CUSTOM_ENV_VAR_FOR_EASYBUILD'")
tup = (filter_arg_regex.pattern, test_report_txt)
self.assertTrue(filter_arg_regex.search(test_report_txt), "%s in %s" % tup)

Expand Down