Skip to content
This repository was archived by the owner on Nov 20, 2018. It is now read-only.

Commit 3627a8e

Browse files
UrlEncoder should always encode the U+003A COLON character
Provides extra defense-in-depth in case an application is using this API to encode a relative URL, otherwise the part before the colon could inadvertently be treated as a scheme.
1 parent 08ddbe8 commit 3627a8e

File tree

2 files changed

+22
-7
lines changed

2 files changed

+22
-7
lines changed

src/Microsoft.Framework.WebEncoders.Core/UrlEncoder.cs

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,12 +131,24 @@ internal UrlUnicodeEncoder(CodePointFilter filter)
131131
: base(filter, MaxOutputCharsPerInputChar)
132132
{
133133
// Per RFC 3987, Sec. 2.2, we want encodings that are safe for
134-
// 'isegment', 'iquery', and 'ifragment'. The only thing these
135-
// all have in common is 'ipchar', which is defined as such:
134+
// four particular components: 'isegment', 'ipath-noscheme',
135+
// 'iquery', and 'ifragment'. The relevant definitions are below.
136+
//
137+
// ipath-noscheme = isegment-nz-nc *( "/" isegment )
138+
//
139+
// isegment = *ipchar
140+
//
141+
// isegment-nz-nc = 1*( iunreserved / pct-encoded / sub-delims
142+
// / "@" )
143+
// ; non-zero-length segment without any colon ":"
136144
//
137145
// ipchar = iunreserved / pct-encoded / sub-delims / ":"
138146
// / "@"
139147
//
148+
// iquery = *( ipchar / iprivate / "/" / "?" )
149+
//
150+
// ifragment = *( ipchar / "/" / "?" )
151+
//
140152
// iunreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" / ucschar
141153
//
142154
// ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
@@ -151,15 +163,19 @@ internal UrlUnicodeEncoder(CodePointFilter filter)
151163
// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
152164
// / "*" / "+" / "," / ";" / "="
153165
//
154-
// From this list, the base encoder forbids "&", "'", "+",
166+
// The only common characters between these four components are the
167+
// intersection of 'isegment-nz-nc' and 'ipchar', which is really
168+
// just 'isegment-nz-nc' (colons forbidden).
169+
//
170+
// From this list, the base encoder already forbids "&", "'", "+",
155171
// and we'll additionally forbid "=" since it has special meaning
156172
// in x-www-form-urlencoded representations.
157173
//
158174
// This means that the full list of allowed characters from the
159175
// Basic Latin set is:
160-
// ALPHA / DIGIT / "-" / "." / "_" / "~" / "!" / "$" / "(" / ")" / "*" / "," / ";" / ":" / "@"
176+
// ALPHA / DIGIT / "-" / "." / "_" / "~" / "!" / "$" / "(" / ")" / "*" / "," / ";" / "@"
161177

162-
const string forbiddenChars = @" #%/=?[\]^`{|}"; // chars from Basic Latin which aren't already disallowed by the base encoder
178+
const string forbiddenChars = @" #%/:=?[\]^`{|}"; // chars from Basic Latin which aren't already disallowed by the base encoder
163179
foreach (char c in forbiddenChars)
164180
{
165181
ForbidCharacter(c);

test/Microsoft.Framework.WebEncoders.Tests/UrlEncoderTests.cs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,7 @@ public void UrlEncode_AllRangesAllowed_StillEncodesForbiddenChars()
123123
case '_':
124124
case '~':
125125

126-
// ipchar
127-
case ':':
126+
// isegment-nz-nc
128127
case '@':
129128

130129
// sub-delims

0 commit comments

Comments
 (0)