Skip to content

Commit ad88c47

Browse files
committed
make hyperlink handle + like an HTML form post by default
1 parent e5cd7e2 commit ad88c47

File tree

3 files changed

+84
-14
lines changed

3 files changed

+84
-14
lines changed

src/hyperlink/_url.py

+57-13
Original file line numberDiff line numberDiff line change
@@ -467,9 +467,13 @@ def _encode_userinfo_part(text, maximal=True):
467467
)
468468
# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
469469

470+
NO_QUERY_PLUS_SCHEMES = set()
470471

471-
def register_scheme(text, uses_netloc=True, default_port=None):
472-
# type: (Text, bool, Optional[int]) -> None
472+
473+
def register_scheme(
474+
text, uses_netloc=True, default_port=None, query_plus_is_space=True
475+
):
476+
# type: (Text, bool, Optional[int], bool) -> None
473477
"""Registers new scheme information, resulting in correct port and
474478
slash behavior from the URL object. There are dozens of standard
475479
schemes preregistered, so this function is mostly meant for
@@ -485,6 +489,8 @@ def register_scheme(text, uses_netloc=True, default_port=None):
485489
not. Defaults to True.
486490
default_port: The default port, if any, for
487491
netloc-using schemes.
492+
query_plus_is_space: If true, a "+" in the query string should be
493+
decoded as a space by DecodedURL.
488494
489495
.. _file an issue: https://github.com/mahmoud/hyperlink/issues
490496
"""
@@ -510,6 +516,9 @@ def register_scheme(text, uses_netloc=True, default_port=None):
510516
else:
511517
raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc)
512518

519+
if not query_plus_is_space:
520+
NO_QUERY_PLUS_SCHEMES.add(text)
521+
513522
return
514523

515524

@@ -1969,6 +1978,16 @@ def remove(
19691978
_EMPTY_URL = URL()
19701979

19711980

1981+
def _replace_plus(text):
1982+
# type: (Text) -> Text
1983+
return text.replace("+", "%20")
1984+
1985+
1986+
def _no_op(text):
1987+
# type: (Text) -> Text
1988+
return text
1989+
1990+
19721991
class DecodedURL(object):
19731992
"""
19741993
:class:`DecodedURL` is a type designed to act as a higher-level
@@ -1998,6 +2017,9 @@ class DecodedURL(object):
19982017
lazy: Set to True to avoid pre-decode all parts of the URL to check for
19992018
validity.
20002019
Defaults to False.
2020+
query_plus_is_space: + characters in the query string should be treated
2021+
as spaces when decoding. If unspecified, the default is taken from
2022+
the scheme.
20012023
20022024
.. note::
20032025
@@ -2012,18 +2034,21 @@ class DecodedURL(object):
20122034
.. versionadded:: 18.0.0
20132035
"""
20142036

2015-
def __init__(self, url=_EMPTY_URL, lazy=False):
2016-
# type: (URL, bool) -> None
2037+
def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None):
2038+
# type: (URL, bool, Optional[bool]) -> None
20172039
self._url = url
2040+
if query_plus_is_space is None:
2041+
query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES
2042+
self._query_plus_is_space = query_plus_is_space
20182043
if not lazy:
20192044
# cache the following, while triggering any decoding
20202045
# issues with decodable fields
20212046
self.host, self.userinfo, self.path, self.query, self.fragment
20222047
return
20232048

20242049
@classmethod
2025-
def from_text(cls, text, lazy=False):
2026-
# type: (Text, bool) -> DecodedURL
2050+
def from_text(cls, text, lazy=False, query_plus_is_space=None):
2051+
# type: (Text, bool, Optional[bool]) -> DecodedURL
20272052
"""\
20282053
Make a `DecodedURL` instance from any text string containing a URL.
20292054
@@ -2034,7 +2059,7 @@ def from_text(cls, text, lazy=False):
20342059
Defaults to True.
20352060
"""
20362061
_url = URL.from_text(text)
2037-
return cls(_url, lazy=lazy)
2062+
return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space)
20382063

20392064
@property
20402065
def encoded_url(self):
@@ -2059,22 +2084,34 @@ def to_iri(self):
20592084
"Passthrough to :meth:`~hyperlink.URL.to_iri()`"
20602085
return self._url.to_iri()
20612086

2087+
def _clone(self, url):
2088+
# type: (URL) -> DecodedURL
2089+
return self.__class__(
2090+
url,
2091+
# TODO: propagate laziness?
2092+
query_plus_is_space=self._query_plus_is_space,
2093+
)
2094+
20622095
def click(self, href=u""):
20632096
# type: (Union[Text, URL, DecodedURL]) -> DecodedURL
20642097
"""Return a new DecodedURL wrapping the result of
20652098
:meth:`~hyperlink.URL.click()`
20662099
"""
20672100
if isinstance(href, DecodedURL):
20682101
href = href._url
2069-
return self.__class__(self._url.click(href=href))
2102+
return self._clone(
2103+
self._url.click(href=href),
2104+
)
20702105

20712106
def sibling(self, segment):
20722107
# type: (Text) -> DecodedURL
20732108
"""Automatically encode any reserved characters in *segment* and
20742109
return a new `DecodedURL` wrapping the result of
20752110
:meth:`~hyperlink.URL.sibling()`
20762111
"""
2077-
return self.__class__(self._url.sibling(_encode_reserved(segment)))
2112+
return self._clone(
2113+
self._url.sibling(_encode_reserved(segment)),
2114+
)
20782115

20792116
def child(self, *segments):
20802117
# type: (Text) -> DecodedURL
@@ -2085,7 +2122,7 @@ def child(self, *segments):
20852122
if not segments:
20862123
return self
20872124
new_segs = [_encode_reserved(s) for s in segments]
2088-
return self.__class__(self._url.child(*new_segs))
2125+
return self._clone(self._url.child(*new_segs))
20892126

20902127
def normalize(
20912128
self,
@@ -2101,7 +2138,7 @@ def normalize(
21012138
"""Return a new `DecodedURL` wrapping the result of
21022139
:meth:`~hyperlink.URL.normalize()`
21032140
"""
2104-
return self.__class__(
2141+
return self._clone(
21052142
self._url.normalize(
21062143
scheme, host, path, query, fragment, userinfo, percents
21072144
)
@@ -2148,11 +2185,18 @@ def path(self):
21482185
def query(self):
21492186
# type: () -> QueryPairs
21502187
if not hasattr(self, "_query"):
2188+
if self._query_plus_is_space:
2189+
predecode = _replace_plus
2190+
else:
2191+
predecode = _no_op
2192+
21512193
self._query = cast(
21522194
QueryPairs,
21532195
tuple(
21542196
tuple(
2155-
_percent_decode(x, raise_subencoding_exc=True)
2197+
_percent_decode(
2198+
predecode(x), raise_subencoding_exc=True
2199+
)
21562200
if x is not None
21572201
else None
21582202
for x in (k, v)
@@ -2248,7 +2292,7 @@ def replace(
22482292
userinfo=userinfo_text,
22492293
uses_netloc=uses_netloc,
22502294
)
2251-
return self.__class__(url=new_url)
2295+
return self._clone(url=new_url)
22522296

22532297
def get(self, name):
22542298
# type: (Text) -> List[Optional[Text]]

src/hyperlink/test/test_decoded_url.py

+16
Original file line numberDiff line numberDiff line change
@@ -210,3 +210,19 @@ def test_click_decoded_url(self):
210210
assert clicked.host == durl.host
211211
assert clicked.path == durl_dest.path
212212
assert clicked.path == ("tëst",)
213+
214+
def test_decode_plus(self):
215+
# type: () -> None
216+
durl = DecodedURL.from_text("/x+y%2B?a=b+c%2B")
217+
assert durl.path == ("x+y+",)
218+
assert durl.get("a") == ["b c+"]
219+
assert durl.query == (("a", "b c+"),)
220+
221+
def test_decode_nonplussed(self):
222+
# type: () -> None
223+
durl = DecodedURL.from_text(
224+
"/x+y%2B?a=b+c%2B", query_plus_is_space=False
225+
)
226+
assert durl.path == ("x+y+",)
227+
assert durl.get("a") == ["b+c+"]
228+
assert durl.query == (("a", "b+c+"),)

src/hyperlink/test/test_scheme_registration.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from .. import _url
77
from .common import HyperlinkTestCase
8-
from .._url import register_scheme, URL
8+
from .._url import register_scheme, URL, DecodedURL
99

1010

1111
class TestSchemeRegistration(HyperlinkTestCase):
@@ -70,3 +70,13 @@ def test_register_invalid_port(self):
7070
# type: () -> None
7171
with self.assertRaises(ValueError):
7272
register_scheme("nope", default_port=cast(bool, object()))
73+
74+
def test_register_no_quote_plus_scheme(self):
75+
# type: () -> None
76+
register_scheme("keepplus", query_plus_is_space=False)
77+
plus_is_not_space = DecodedURL.from_text(
78+
"keepplus://example.com/?q=a+b"
79+
)
80+
plus_is_space = DecodedURL.from_text("https://example.com/?q=a+b")
81+
assert plus_is_not_space.get("q") == ["a+b"]
82+
assert plus_is_space.get("q") == ["a b"]

0 commit comments

Comments
 (0)