@@ -467,9 +467,13 @@ def _encode_userinfo_part(text, maximal=True):
467
467
)
468
468
# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
469
469
470
+ NO_QUERY_PLUS_SCHEMES = set ()
470
471
471
- def register_scheme (text , uses_netloc = True , default_port = None ):
472
- # type: (Text, bool, Optional[int]) -> None
472
+
473
+ def register_scheme (
474
+ text , uses_netloc = True , default_port = None , query_plus_is_space = True
475
+ ):
476
+ # type: (Text, bool, Optional[int], bool) -> None
473
477
"""Registers new scheme information, resulting in correct port and
474
478
slash behavior from the URL object. There are dozens of standard
475
479
schemes preregistered, so this function is mostly meant for
@@ -485,6 +489,8 @@ def register_scheme(text, uses_netloc=True, default_port=None):
485
489
not. Defaults to True.
486
490
default_port: The default port, if any, for
487
491
netloc-using schemes.
492
+ query_plus_is_space: If true, a "+" in the query string should be
493
+ decoded as a space by DecodedURL.
488
494
489
495
.. _file an issue: https://github.com/mahmoud/hyperlink/issues
490
496
"""
@@ -510,6 +516,9 @@ def register_scheme(text, uses_netloc=True, default_port=None):
510
516
else :
511
517
raise ValueError ("uses_netloc expected bool, not: %r" % uses_netloc )
512
518
519
+ if not query_plus_is_space :
520
+ NO_QUERY_PLUS_SCHEMES .add (text )
521
+
513
522
return
514
523
515
524
@@ -1969,6 +1978,16 @@ def remove(
1969
1978
_EMPTY_URL = URL ()
1970
1979
1971
1980
1981
+ def _replace_plus (text ):
1982
+ # type: (Text) -> Text
1983
+ return text .replace ("+" , "%20" )
1984
+
1985
+
1986
+ def _no_op (text ):
1987
+ # type: (Text) -> Text
1988
+ return text
1989
+
1990
+
1972
1991
class DecodedURL (object ):
1973
1992
"""
1974
1993
:class:`DecodedURL` is a type designed to act as a higher-level
@@ -1998,6 +2017,9 @@ class DecodedURL(object):
1998
2017
lazy: Set to True to avoid pre-decode all parts of the URL to check for
1999
2018
validity.
2000
2019
Defaults to False.
2020
+ query_plus_is_space: + characters in the query string should be treated
2021
+ as spaces when decoding. If unspecified, the default is taken from
2022
+ the scheme.
2001
2023
2002
2024
.. note::
2003
2025
@@ -2012,18 +2034,21 @@ class DecodedURL(object):
2012
2034
.. versionadded:: 18.0.0
2013
2035
"""
2014
2036
2015
- def __init__ (self , url = _EMPTY_URL , lazy = False ):
2016
- # type: (URL, bool) -> None
2037
+ def __init__ (self , url = _EMPTY_URL , lazy = False , query_plus_is_space = None ):
2038
+ # type: (URL, bool, Optional[bool] ) -> None
2017
2039
self ._url = url
2040
+ if query_plus_is_space is None :
2041
+ query_plus_is_space = url .scheme not in NO_QUERY_PLUS_SCHEMES
2042
+ self ._query_plus_is_space = query_plus_is_space
2018
2043
if not lazy :
2019
2044
# cache the following, while triggering any decoding
2020
2045
# issues with decodable fields
2021
2046
self .host , self .userinfo , self .path , self .query , self .fragment
2022
2047
return
2023
2048
2024
2049
@classmethod
2025
- def from_text (cls , text , lazy = False ):
2026
- # type: (Text, bool) -> DecodedURL
2050
+ def from_text (cls , text , lazy = False , query_plus_is_space = None ):
2051
+ # type: (Text, bool, Optional[bool] ) -> DecodedURL
2027
2052
"""\
2028
2053
Make a `DecodedURL` instance from any text string containing a URL.
2029
2054
@@ -2034,7 +2059,7 @@ def from_text(cls, text, lazy=False):
2034
2059
Defaults to True.
2035
2060
"""
2036
2061
_url = URL .from_text (text )
2037
- return cls (_url , lazy = lazy )
2062
+ return cls (_url , lazy = lazy , query_plus_is_space = query_plus_is_space )
2038
2063
2039
2064
@property
2040
2065
def encoded_url (self ):
@@ -2059,22 +2084,34 @@ def to_iri(self):
2059
2084
"Passthrough to :meth:`~hyperlink.URL.to_iri()`"
2060
2085
return self ._url .to_iri ()
2061
2086
2087
+ def _clone (self , url ):
2088
+ # type: (URL) -> DecodedURL
2089
+ return self .__class__ (
2090
+ url ,
2091
+ # TODO: propagate laziness?
2092
+ query_plus_is_space = self ._query_plus_is_space ,
2093
+ )
2094
+
2062
2095
def click (self , href = u"" ):
2063
2096
# type: (Union[Text, URL, DecodedURL]) -> DecodedURL
2064
2097
"""Return a new DecodedURL wrapping the result of
2065
2098
:meth:`~hyperlink.URL.click()`
2066
2099
"""
2067
2100
if isinstance (href , DecodedURL ):
2068
2101
href = href ._url
2069
- return self .__class__ (self ._url .click (href = href ))
2102
+ return self ._clone (
2103
+ self ._url .click (href = href ),
2104
+ )
2070
2105
2071
2106
def sibling (self , segment ):
2072
2107
# type: (Text) -> DecodedURL
2073
2108
"""Automatically encode any reserved characters in *segment* and
2074
2109
return a new `DecodedURL` wrapping the result of
2075
2110
:meth:`~hyperlink.URL.sibling()`
2076
2111
"""
2077
- return self .__class__ (self ._url .sibling (_encode_reserved (segment )))
2112
+ return self ._clone (
2113
+ self ._url .sibling (_encode_reserved (segment )),
2114
+ )
2078
2115
2079
2116
def child (self , * segments ):
2080
2117
# type: (Text) -> DecodedURL
@@ -2085,7 +2122,7 @@ def child(self, *segments):
2085
2122
if not segments :
2086
2123
return self
2087
2124
new_segs = [_encode_reserved (s ) for s in segments ]
2088
- return self .__class__ (self ._url .child (* new_segs ))
2125
+ return self ._clone (self ._url .child (* new_segs ))
2089
2126
2090
2127
def normalize (
2091
2128
self ,
@@ -2101,7 +2138,7 @@ def normalize(
2101
2138
"""Return a new `DecodedURL` wrapping the result of
2102
2139
:meth:`~hyperlink.URL.normalize()`
2103
2140
"""
2104
- return self .__class__ (
2141
+ return self ._clone (
2105
2142
self ._url .normalize (
2106
2143
scheme , host , path , query , fragment , userinfo , percents
2107
2144
)
@@ -2148,11 +2185,18 @@ def path(self):
2148
2185
def query (self ):
2149
2186
# type: () -> QueryPairs
2150
2187
if not hasattr (self , "_query" ):
2188
+ if self ._query_plus_is_space :
2189
+ predecode = _replace_plus
2190
+ else :
2191
+ predecode = _no_op
2192
+
2151
2193
self ._query = cast (
2152
2194
QueryPairs ,
2153
2195
tuple (
2154
2196
tuple (
2155
- _percent_decode (x , raise_subencoding_exc = True )
2197
+ _percent_decode (
2198
+ predecode (x ), raise_subencoding_exc = True
2199
+ )
2156
2200
if x is not None
2157
2201
else None
2158
2202
for x in (k , v )
@@ -2248,7 +2292,7 @@ def replace(
2248
2292
userinfo = userinfo_text ,
2249
2293
uses_netloc = uses_netloc ,
2250
2294
)
2251
- return self .__class__ (url = new_url )
2295
+ return self ._clone (url = new_url )
2252
2296
2253
2297
def get (self , name ):
2254
2298
# type: (Text) -> List[Optional[Text]]
0 commit comments