Skip to content

Commit 558d39c

Browse files
miss-islingtonsethmlarsonZeroIntensityambv
authored andcommitted
[3.9] pythongh-105704: Disallow square brackets ([ and ]) in domain names for parsed URLs (pythonGH-129418) (python#129530)
(cherry picked from commit d89a5f6) Co-authored-by: Seth Michael Larson <[email protected]> Co-authored-by: Peter Bierma <[email protected]> Co-authored-by: Łukasz Langa <[email protected]>
1 parent 3a6c6e0 commit 558d39c

File tree

3 files changed

+56
-3
lines changed

3 files changed

+56
-3
lines changed

Lib/test/test_urlparse.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1173,12 +1173,46 @@ def test_invalid_bracketed_hosts(self):
11731173
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query')
11741174
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query')
11751175
self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path')
1176+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]')
1177+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix')
1178+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]/')
1179+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix/')
1180+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]?')
1181+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix?')
1182+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]')
1183+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix')
1184+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]/')
1185+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix/')
1186+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]?')
1187+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix?')
1188+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:a')
1189+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:a')
1190+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:a1')
1191+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:a1')
1192+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:1a')
1193+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:1a')
1194+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:')
1195+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:/')
1196+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:?')
1197+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@prefix.[v6a.ip]')
1198+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@[v6a.ip].suffix')
1199+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip')
1200+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip]')
1201+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://]v6a.ip[')
1202+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://]v6a.ip')
1203+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[')
1204+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip')
1205+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip].suffix')
1206+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip[suffix')
1207+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip')
1208+
self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[suffix')
11761209

11771210
def test_splitting_bracketed_hosts(self):
1178-
p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query')
1211+
p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]:1234/path?query')
11791212
self.assertEqual(p1.hostname, 'v6a.ip')
11801213
self.assertEqual(p1.username, 'user')
11811214
self.assertEqual(p1.path, '/path')
1215+
self.assertEqual(p1.port, 1234)
11821216

11831217
def test_port_casting_failure_message(self):
11841218
message = "Port could not be cast to integer value as 'oracle'"

Lib/urllib/parse.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,22 @@ def _remove_unsafe_bytes_from_url(url):
431431
url = url.replace(b, "")
432432
return url
433433

434+
def _check_bracketed_netloc(netloc):
435+
# Note that this function must mirror the splitting
436+
# done in NetlocResultMixins._hostinfo().
437+
hostname_and_port = netloc.rpartition('@')[2]
438+
before_bracket, have_open_br, bracketed = hostname_and_port.partition('[')
439+
if have_open_br:
440+
# No data is allowed before a bracket.
441+
if before_bracket:
442+
raise ValueError("Invalid IPv6 URL")
443+
hostname, _, port = bracketed.partition(']')
444+
# No data is allowed after the bracket but before the port delimiter.
445+
if port and not port.startswith(":"):
446+
raise ValueError("Invalid IPv6 URL")
447+
else:
448+
hostname, _, port = hostname_and_port.partition(':')
449+
_check_bracketed_host(hostname)
434450

435451
# Valid bracketed hosts are defined in
436452
# https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/
@@ -498,8 +514,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
498514
(']' in netloc and '[' not in netloc)):
499515
raise ValueError("Invalid IPv6 URL")
500516
if '[' in netloc and ']' in netloc:
501-
bracketed_host = netloc.partition('[')[2].partition(']')[0]
502-
_check_bracketed_host(bracketed_host)
517+
_check_bracketed_netloc(netloc)
503518
if allow_fragments and '#' in url:
504519
url, fragment = url.split('#', 1)
505520
if '?' in url:
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
When using :func:`urllib.parse.urlsplit` and :func:`urllib.parse.urlparse` host
2+
parsing would not reject domain names containing square brackets (``[`` and
3+
``]``). Square brackets are only valid for IPv6 and IPvFuture hosts according to
4+
`RFC 3986 Section 3.2.2 <https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2>`__.

0 commit comments

Comments
 (0)