diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index fa9ae15be9..5c43fa3cc6 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1353,8 +1353,8 @@ def from_base64(base64_string): Components = namedtuple("Components", ["scheme", "netloc", "path", "query", "fragment"]) -def sanitize_url(url, remove_authority=True, remove_query_values=True): - # type: (str, bool, bool) -> str +def sanitize_url(url, remove_authority=True, remove_query_values=True, split=False): + # type: (str, bool, bool, bool) -> Union[str, Components] """ Removes the authority and query parameter values from a given URL. """ @@ -1383,17 +1383,18 @@ def sanitize_url(url, remove_authority=True, remove_query_values=True): else: query_string = parsed_url.query - safe_url = urlunsplit( - Components( - scheme=parsed_url.scheme, - netloc=netloc, - query=query_string, - path=parsed_url.path, - fragment=parsed_url.fragment, - ) + components = Components( + scheme=parsed_url.scheme, + netloc=netloc, + query=query_string, + path=parsed_url.path, + fragment=parsed_url.fragment, ) - return safe_url + if split: + return components + else: + return urlunsplit(components) ParsedUrl = namedtuple("ParsedUrl", ["url", "query", "fragment"]) @@ -1406,20 +1407,25 @@ def parse_url(url, sanitize=True): parameters will be sanitized to remove sensitive data. The autority (username and password) in the URL will always be removed. """ - url = sanitize_url(url, remove_authority=True, remove_query_values=sanitize) + parsed_url = sanitize_url( + url, remove_authority=True, remove_query_values=sanitize, split=True + ) - parsed_url = urlsplit(url) base_url = urlunsplit( Components( - scheme=parsed_url.scheme, - netloc=parsed_url.netloc, + scheme=parsed_url.scheme, # type: ignore + netloc=parsed_url.netloc, # type: ignore query="", - path=parsed_url.path, + path=parsed_url.path, # type: ignore fragment="", ) ) - return ParsedUrl(url=base_url, query=parsed_url.query, fragment=parsed_url.fragment) + return ParsedUrl( + url=base_url, + query=parsed_url.query, # type: ignore + fragment=parsed_url.fragment, # type: ignore + ) def is_valid_sample_rate(rate, source): diff --git a/tests/test_utils.py b/tests/test_utils.py index 53e3025b98..4a028d70b3 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -69,6 +69,24 @@ def test_sanitize_url(url, expected_result): assert parts == expected_parts +def test_sanitize_url_and_split(): + parts = sanitize_url( + "https://username:password@example.com?token=abc&sessionid=123&save=true", + split=True, + ) + + expected_query = sorted( + "token=[Filtered]&sessionid=[Filtered]&save=[Filtered]".split("&") + ) + query = sorted(parts.query.split("&")) + + assert parts.scheme == "https" + assert parts.netloc == "[Filtered]:[Filtered]@example.com" + assert query == expected_query + assert parts.path == "" + assert parts.fragment == "" + + @pytest.mark.parametrize( ("url", "sanitize", "expected_url", "expected_query", "expected_fragment"), [