From 912809ca68454c37c33f9f5e7f780588ac4ecca1 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Wed, 7 Jun 2023 16:45:46 +0200 Subject: [PATCH 1/7] fix --- sentry_sdk/utils.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index fa9ae15be9..4a3aeaa77b 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1353,8 +1353,8 @@ def from_base64(base64_string): Components = namedtuple("Components", ["scheme", "netloc", "path", "query", "fragment"]) -def sanitize_url(url, remove_authority=True, remove_query_values=True): - # type: (str, bool, bool) -> str +def sanitize_url(url, remove_authority=True, remove_query_values=True, split=False): + # type: (str, bool, bool, bool) -> str """ Removes the authority and query parameter values from a given URL. """ @@ -1383,17 +1383,18 @@ def sanitize_url(url, remove_authority=True, remove_query_values=True): else: query_string = parsed_url.query - safe_url = urlunsplit( - Components( - scheme=parsed_url.scheme, - netloc=netloc, - query=query_string, - path=parsed_url.path, - fragment=parsed_url.fragment, - ) + components = Components( + scheme=parsed_url.scheme, + netloc=netloc, + query=query_string, + path=parsed_url.path, + fragment=parsed_url.fragment, ) - return safe_url + if split: + return components + else: + return urlunsplit(components) ParsedUrl = namedtuple("ParsedUrl", ["url", "query", "fragment"]) @@ -1406,9 +1407,10 @@ def parse_url(url, sanitize=True): parameters will be sanitized to remove sensitive data. The autority (username and password) in the URL will always be removed. """ - url = sanitize_url(url, remove_authority=True, remove_query_values=sanitize) + parsed_url = sanitize_url( + url, remove_authority=True, remove_query_values=sanitize, split=True + ) - parsed_url = urlsplit(url) base_url = urlunsplit( Components( scheme=parsed_url.scheme, From a873c38b6733598299aad72356964a025ea7ad50 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Wed, 7 Jun 2023 17:04:42 +0200 Subject: [PATCH 2/7] type fixes, test --- sentry_sdk/utils.py | 2 +- tests/test_utils.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 4a3aeaa77b..b1db13034b 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1354,7 +1354,7 @@ def from_base64(base64_string): def sanitize_url(url, remove_authority=True, remove_query_values=True, split=False): - # type: (str, bool, bool, bool) -> str + # type: (str, bool, bool, bool) -> Union[str, Components] """ Removes the authority and query parameter values from a given URL. """ diff --git a/tests/test_utils.py b/tests/test_utils.py index 53e3025b98..4961a98053 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,6 +3,7 @@ import sys from sentry_sdk.utils import ( + Components, is_valid_sample_rate, logger, match_regex_list, @@ -69,6 +70,19 @@ def test_sanitize_url(url, expected_result): assert parts == expected_parts +def test_sanitize_url_and_split(): + parts = sanitize_url( + "https://example.com?token=abc&sessionid=123&save=true", split=True + ) + assert parts == Components( + scheme="https", + netloc="example.com", + path="", + query="token=[Filtered]&sessionid=[Filtered]&save=[Filtered]", + fragment="", + ) + + @pytest.mark.parametrize( ("url", "sanitize", "expected_url", "expected_query", "expected_fragment"), [ From 7d28a9351a90101eef8acb5b842edb823b034128 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 7 Jun 2023 17:10:54 +0200 Subject: [PATCH 3/7] Added username/pwd to url in test --- tests/test_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 4961a98053..eedde295a2 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -72,11 +72,12 @@ def test_sanitize_url(url, expected_result): def test_sanitize_url_and_split(): parts = sanitize_url( - "https://example.com?token=abc&sessionid=123&save=true", split=True + "https://username:password@example.com?token=abc&sessionid=123&save=true", + split=True, ) assert parts == Components( scheme="https", - netloc="example.com", + netloc="[Filtered]:[Filtered]@example.com", path="", query="token=[Filtered]&sessionid=[Filtered]&save=[Filtered]", fragment="", From 11fe5248682ae07500109a1e2320ebb08861ce59 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 7 Jun 2023 17:13:00 +0200 Subject: [PATCH 4/7] Fixed typing --- sentry_sdk/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index b1db13034b..a8d3850ede 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1409,7 +1409,7 @@ def parse_url(url, sanitize=True): """ parsed_url = sanitize_url( url, remove_authority=True, remove_query_values=sanitize, split=True - ) + ) # type: Components base_url = urlunsplit( Components( From 5f349013b80a35dce55f818ae16f6dd5eb45335f Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 7 Jun 2023 17:18:38 +0200 Subject: [PATCH 5/7] Ignore mypy --- sentry_sdk/utils.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index a8d3850ede..5c43fa3cc6 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -1409,19 +1409,23 @@ def parse_url(url, sanitize=True): """ parsed_url = sanitize_url( url, remove_authority=True, remove_query_values=sanitize, split=True - ) # type: Components + ) base_url = urlunsplit( Components( - scheme=parsed_url.scheme, - netloc=parsed_url.netloc, + scheme=parsed_url.scheme, # type: ignore + netloc=parsed_url.netloc, # type: ignore query="", - path=parsed_url.path, + path=parsed_url.path, # type: ignore fragment="", ) ) - return ParsedUrl(url=base_url, query=parsed_url.query, fragment=parsed_url.fragment) + return ParsedUrl( + url=base_url, + query=parsed_url.query, # type: ignore + fragment=parsed_url.fragment, # type: ignore + ) def is_valid_sample_rate(rate, source): From 3a39059eec8ee4731f4bee9a2329993dd87569b0 Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Wed, 7 Jun 2023 17:27:46 +0200 Subject: [PATCH 6/7] sort query --- tests/test_utils.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index eedde295a2..08ede5f4c2 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -3,7 +3,6 @@ import sys from sentry_sdk.utils import ( - Components, is_valid_sample_rate, logger, match_regex_list, @@ -75,13 +74,17 @@ def test_sanitize_url_and_split(): "https://username:password@example.com?token=abc&sessionid=123&save=true", split=True, ) - assert parts == Components( - scheme="https", - netloc="[Filtered]:[Filtered]@example.com", - path="", - query="token=[Filtered]&sessionid=[Filtered]&save=[Filtered]", - fragment="", + + expected_query = sorted( + "token=[Filtered]&sessionid=[Filtered]&save=[Filtered]".split("&") ) + query = sorted(parts.split("&")) + + assert parts.scheme == "https" + assert parts.netloc == "[Filtered]:[Filtered]@example.com" + assert query == expected_query + assert parts.path == "" + assert parts.fragment == "" @pytest.mark.parametrize( From 207ec9d64bdb65f65cccb33359fe591f00880b2e Mon Sep 17 00:00:00 2001 From: Ivana Kellyerova Date: Wed, 7 Jun 2023 17:30:51 +0200 Subject: [PATCH 7/7] fix --- tests/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_utils.py b/tests/test_utils.py index 08ede5f4c2..4a028d70b3 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -78,7 +78,7 @@ def test_sanitize_url_and_split(): expected_query = sorted( "token=[Filtered]&sessionid=[Filtered]&save=[Filtered]".split("&") ) - query = sorted(parts.split("&")) + query = sorted(parts.query.split("&")) assert parts.scheme == "https" assert parts.netloc == "[Filtered]:[Filtered]@example.com"