Skip to content

Commit 317316c

Browse files
committed
split off parse_http_header_fields_urlpat() to process header files recursively
1 parent 7c04568 commit 317316c

File tree

2 files changed

+67
-48
lines changed

2 files changed

+67
-48
lines changed

easybuild/tools/filetools.py

Lines changed: 65 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,56 @@ def derive_alt_pypi_url(url):
567567
return alt_pypi_url
568568

569569

570+
def parse_http_header_fields_urlpat(arg, urlpat=None, urlpat_headers={}, maxdepth=3):
571+
"""
572+
Recurse into PAT::[PAT::FILE|PAT::HEADER: FIELD|HEADER: FIELD] where FILE may be a
573+
file containing lines matching the same format, and flatten the result as a dict
574+
e.g. {'^example.com': ['Authorization: Basic token', 'User-Agent: Special Agent']}
575+
"""
576+
# stop infinite recursion that might happen if a file.txt refers to itself
577+
if maxdepth < 0:
578+
_log.debug("Failed to parse_http_header_fields_urlpat (recursion limit)")
579+
return urlpat_headers
580+
581+
if not isinstance(arg, str):
582+
_log.debug("Failed to parse_http_header_fields_urlpat (argument not a string)")
583+
return urlpat_headers
584+
585+
# HTTP header fields are separated by CRLF but splitting on LF is more convenient
586+
for argline in arg.split('\n'):
587+
argline = argline.strip() # remove optional whitespace (e.g. remaining CR)
588+
if argline == '' or '#' in argline[0]:
589+
continue # permit comment lines: ignore them
590+
591+
if os.path.isfile(os.path.join(os.getcwd(), argline)):
592+
# expand existing relative path to absolute
593+
argline = os.path.join(os.path.join(os.getcwd(), argline))
594+
if os.path.isfile(argline):
595+
# argline is a file path, so read that instead
596+
_log.debug('File included in parse_http_header_fields_urlpat: %s' % argline)
597+
argline = read_file(argline)
598+
urlpat_headers = parse_http_header_fields_urlpat(argline, urlpat, urlpat_headers, maxdepth-1)
599+
continue
600+
601+
# URL pattern is separated by '::' from a HTTP header field
602+
if '::' in argline:
603+
[urlpat, argline] = argline.split('::', 1) # get the urlpat
604+
# the remainder may be another parseable argument, recurse with same depth
605+
urlpat_headers = parse_http_header_fields_urlpat(argline, urlpat, urlpat_headers, maxdepth)
606+
continue
607+
608+
if urlpat is not None:
609+
if urlpat in urlpat_headers.keys():
610+
urlpat_headers[urlpat].append(argline) # add headers to the list
611+
else:
612+
urlpat_headers[urlpat] = list([argline]) # new list headers for this urlpat
613+
else:
614+
_log.warning("Non-empty argument to http-header-fields-urlpat ignored (missing URL pattern)")
615+
616+
# return a dict full of {urlpat: [list, of, headers]}
617+
return urlpat_headers
618+
619+
570620
def download_file(filename, url, path, forced=False):
571621
"""Download a file from the given URL, to the specified path."""
572622

@@ -579,41 +629,14 @@ def download_file(filename, url, path, forced=False):
579629
timeout = 10
580630
_log.debug("Using timeout of %s seconds for initiating download" % timeout)
581631

582-
# apply extra custom HTTP header fields for URLs containing a pattern
632+
# parse option HTTP header fields for URLs containing a pattern
583633
http_header_fields_urlpat = build_option('http_header_fields_urlpat')
584-
extra_http_header_fields = list()
585-
if isinstance(http_header_fields_urlpat, (list, tuple)):
586-
prev_urlpat = None
587-
header_urlpat = None
588-
for argument in http_header_fields_urlpat:
589-
_log.debug("Got build option http header fields urlpat = %s" % argument)
590-
# if argument is actually a file path, read that instead (useful with sensitive data)
591-
if os.path.isfile(argument):
592-
argument = read_file(argument)
593-
# use '::' as a delimeter between URL pattern and the header field
594-
if header_urlpat is not None:
595-
# remember previous urlpat
596-
prev_urlpat = header_urlpat
597-
if '::' in argument:
598-
_log.debug("It contains ::")
599-
[header_urlpat, header_field] = argument.split('::', 1)
600-
elif prev_urlpat is not None:
601-
header_urlpat = prev_urlpat # reuse previous urlpat
602-
header_field = argument # whole argument only contains header info
603-
else:
604-
# ignore the argument entirely if the URL pattern isn't (or wasn't) given
605-
_log.debug("no urlpat given, giving up")
606-
continue
607-
_log.debug("urlpat = %s" % header_urlpat)
608-
_log.debug("header = %s" % header_field)
609-
_log.debug("url = %s" % url)
610-
if re.search(header_urlpat, url):
611-
_log.debug("url matched!")
612-
# if header is actually a file path, read that instead (useful with sensitive data)
613-
if os.path.isfile(header_field):
614-
header_field = read_file(header_field)
615-
_log.debug("header from file = %s" % header_field)
616-
extra_http_header_fields.append(header_field)
634+
# compile a dict full of {urlpat: [header, list]}
635+
urlpat_headers = dict()
636+
if http_header_fields_urlpat is not None:
637+
# there may be multiple options given, parse them all, while updating urlpat_headers
638+
for arg in http_header_fields_urlpat:
639+
urlpat_headers = parse_http_header_fields_urlpat(arg, None, urlpat_headers)
617640

618641
# make sure directory exists
619642
basedir = os.path.dirname(path)
@@ -627,18 +650,14 @@ def download_file(filename, url, path, forced=False):
627650
# use custom HTTP header
628651
headers = {'User-Agent': 'EasyBuild', 'Accept': '*/*'}
629652

630-
# permit additional or override headers via http_headers_fields option
631-
# whose string value contains header-fields grammar: (see rfc7230)
632-
# header-fields = *( header-field *(CR) LF )
633-
# header-field = field-name ":" OWS field-value OWS
634-
# OWS = ( optional white space )
635-
# Note CR may be omitted for convenience (it is absorbed in OWS and stripped)
636-
# Note field-value may not not contain ":"
637-
for http_header_fields in extra_http_header_fields:
638-
extraheaders = dict(hf.split(':') for hf in http_header_fields.split('\n') if hf.count(':') == 1)
639-
for key, val in extraheaders.items():
640-
headers[key] = val
641-
_log.debug('Setting custom HTTP header field: %s (not logging the value)' % (key))
653+
# permit additional or override headers via http_headers_fields_urlpat option
654+
# only append/override HTTP header fields that match current url
655+
for urlpatkey, http_header_fields in urlpat_headers.items():
656+
if re.search(urlpatkey, url):
657+
extraheaders = dict(hf.split(':', 1) for hf in http_header_fields)
658+
for key, val in extraheaders.items():
659+
headers[key] = val
660+
_log.debug('Custom HTTP header field set: %s (value omitted from log)' % (key))
642661

643662
# for backward compatibility, and to avoid relying on 3rd party Python library 'requests'
644663
url_req = std_urllib.Request(url, headers=headers)

easybuild/tools/options.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,8 @@ def override_options(self):
398398
'hide-toolchains': ("Comma separated list of toolchains that you want automatically hidden, "
399399
"(e.g. --hide-toolchains=GCCcore)", 'strlist', 'extend', None),
400400
'http-header-fields-urlpat': (("Set extra HTTP header fields (or file) for URL patterns;"
401-
"(e.g. ^https://www.example.com::/path/to/headers.txt)"),
402-
None, 'append', None, {'metavar':'PAT::FIELD[,[PAT::]FIELDS..]'}),
401+
"(e.g. ^https://www.example.com::/path/to/headers.txt)"),
402+
None, 'append', None, {'metavar': 'PAT::FIELD[,[PAT::]FIELDS..]'}),
403403
'ignore-checksums': ("Ignore failing checksum verification", None, 'store_true', False),
404404
'ignore-osdeps': ("Ignore any listed OS dependencies", None, 'store_true', False),
405405
'install-latest-eb-release': ("Install latest known version of easybuild", None, 'store_true', False),

0 commit comments

Comments
 (0)