@@ -567,6 +567,56 @@ def derive_alt_pypi_url(url):
567567 return alt_pypi_url
568568
569569
570+ def parse_http_header_fields_urlpat (arg , urlpat = None , urlpat_headers = {}, maxdepth = 3 ):
571+ """
572+ Recurse into PAT::[PAT::FILE|PAT::HEADER: FIELD|HEADER: FIELD] where FILE may be a
573+ file containing lines matching the same format, and flatten the result as a dict
574+ e.g. {'^example.com': ['Authorization: Basic token', 'User-Agent: Special Agent']}
575+ """
576+ # stop infinite recursion that might happen if a file.txt refers to itself
577+ if maxdepth < 0 :
578+ _log .debug ("Failed to parse_http_header_fields_urlpat (recursion limit)" )
579+ return urlpat_headers
580+
581+ if not isinstance (arg , str ):
582+ _log .debug ("Failed to parse_http_header_fields_urlpat (argument not a string)" )
583+ return urlpat_headers
584+
585+ # HTTP header fields are separated by CRLF but splitting on LF is more convenient
586+ for argline in arg .split ('\n ' ):
587+ argline = argline .strip () # remove optional whitespace (e.g. remaining CR)
588+ if argline == '' or '#' in argline [0 ]:
589+ continue # permit comment lines: ignore them
590+
591+ if os .path .isfile (os .path .join (os .getcwd (), argline )):
592+ # expand existing relative path to absolute
593+ argline = os .path .join (os .path .join (os .getcwd (), argline ))
594+ if os .path .isfile (argline ):
595+ # argline is a file path, so read that instead
596+ _log .debug ('File included in parse_http_header_fields_urlpat: %s' % argline )
597+ argline = read_file (argline )
598+ urlpat_headers = parse_http_header_fields_urlpat (argline , urlpat , urlpat_headers , maxdepth - 1 )
599+ continue
600+
601+ # URL pattern is separated by '::' from a HTTP header field
602+ if '::' in argline :
603+ [urlpat , argline ] = argline .split ('::' , 1 ) # get the urlpat
604+ # the remainder may be another parseable argument, recurse with same depth
605+ urlpat_headers = parse_http_header_fields_urlpat (argline , urlpat , urlpat_headers , maxdepth )
606+ continue
607+
608+ if urlpat is not None :
609+ if urlpat in urlpat_headers .keys ():
610+ urlpat_headers [urlpat ].append (argline ) # add headers to the list
611+ else :
612+ urlpat_headers [urlpat ] = list ([argline ]) # new list headers for this urlpat
613+ else :
614+ _log .warning ("Non-empty argument to http-header-fields-urlpat ignored (missing URL pattern)" )
615+
616+ # return a dict full of {urlpat: [list, of, headers]}
617+ return urlpat_headers
618+
619+
570620def download_file (filename , url , path , forced = False ):
571621 """Download a file from the given URL, to the specified path."""
572622
@@ -579,41 +629,14 @@ def download_file(filename, url, path, forced=False):
579629 timeout = 10
580630 _log .debug ("Using timeout of %s seconds for initiating download" % timeout )
581631
582- # apply extra custom HTTP header fields for URLs containing a pattern
632+ # parse option HTTP header fields for URLs containing a pattern
583633 http_header_fields_urlpat = build_option ('http_header_fields_urlpat' )
584- extra_http_header_fields = list ()
585- if isinstance (http_header_fields_urlpat , (list , tuple )):
586- prev_urlpat = None
587- header_urlpat = None
588- for argument in http_header_fields_urlpat :
589- _log .debug ("Got build option http header fields urlpat = %s" % argument )
590- # if argument is actually a file path, read that instead (useful with sensitive data)
591- if os .path .isfile (argument ):
592- argument = read_file (argument )
593- # use '::' as a delimeter between URL pattern and the header field
594- if header_urlpat is not None :
595- # remember previous urlpat
596- prev_urlpat = header_urlpat
597- if '::' in argument :
598- _log .debug ("It contains ::" )
599- [header_urlpat , header_field ] = argument .split ('::' , 1 )
600- elif prev_urlpat is not None :
601- header_urlpat = prev_urlpat # reuse previous urlpat
602- header_field = argument # whole argument only contains header info
603- else :
604- # ignore the argument entirely if the URL pattern isn't (or wasn't) given
605- _log .debug ("no urlpat given, giving up" )
606- continue
607- _log .debug ("urlpat = %s" % header_urlpat )
608- _log .debug ("header = %s" % header_field )
609- _log .debug ("url = %s" % url )
610- if re .search (header_urlpat , url ):
611- _log .debug ("url matched!" )
612- # if header is actually a file path, read that instead (useful with sensitive data)
613- if os .path .isfile (header_field ):
614- header_field = read_file (header_field )
615- _log .debug ("header from file = %s" % header_field )
616- extra_http_header_fields .append (header_field )
634+ # compile a dict full of {urlpat: [header, list]}
635+ urlpat_headers = dict ()
636+ if http_header_fields_urlpat is not None :
637+ # there may be multiple options given, parse them all, while updating urlpat_headers
638+ for arg in http_header_fields_urlpat :
639+ urlpat_headers = parse_http_header_fields_urlpat (arg , None , urlpat_headers )
617640
618641 # make sure directory exists
619642 basedir = os .path .dirname (path )
@@ -627,18 +650,14 @@ def download_file(filename, url, path, forced=False):
627650 # use custom HTTP header
628651 headers = {'User-Agent' : 'EasyBuild' , 'Accept' : '*/*' }
629652
630- # permit additional or override headers via http_headers_fields option
631- # whose string value contains header-fields grammar: (see rfc7230)
632- # header-fields = *( header-field *(CR) LF )
633- # header-field = field-name ":" OWS field-value OWS
634- # OWS = ( optional white space )
635- # Note CR may be omitted for convenience (it is absorbed in OWS and stripped)
636- # Note field-value may not not contain ":"
637- for http_header_fields in extra_http_header_fields :
638- extraheaders = dict (hf .split (':' ) for hf in http_header_fields .split ('\n ' ) if hf .count (':' ) == 1 )
639- for key , val in extraheaders .items ():
640- headers [key ] = val
641- _log .debug ('Setting custom HTTP header field: %s (not logging the value)' % (key ))
653+ # permit additional or override headers via http_headers_fields_urlpat option
654+ # only append/override HTTP header fields that match current url
655+ for urlpatkey , http_header_fields in urlpat_headers .items ():
656+ if re .search (urlpatkey , url ):
657+ extraheaders = dict (hf .split (':' , 1 ) for hf in http_header_fields )
658+ for key , val in extraheaders .items ():
659+ headers [key ] = val
660+ _log .debug ('Custom HTTP header field set: %s (value omitted from log)' % (key ))
642661
643662 # for backward compatibility, and to avoid relying on 3rd party Python library 'requests'
644663 url_req = std_urllib .Request (url , headers = headers )
0 commit comments