Skip to content

Commit 2ef3804

Browse files
author
Cesar Blum Silveira
authored
Span-based RemoveDotSegments (#1448)
1 parent 9a4a810 commit 2ef3804

File tree

6 files changed

+323
-108
lines changed

6 files changed

+323
-108
lines changed

src/Microsoft.AspNetCore.Server.Kestrel/Internal/Http/Frame.cs

Lines changed: 26 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,29 +1253,42 @@ private void OnOriginFormTarget(HttpMethod method, HttpVersion version, Span<byt
12531253

12541254
try
12551255
{
1256+
// Read raw target before mutating memory.
1257+
rawTarget = target.GetAsciiStringNonNullCharacters();
1258+
12561259
if (pathEncoded)
12571260
{
1258-
// Read raw target before mutating memory.
1259-
rawTarget = target.GetAsciiStringNonNullCharacters();
1261+
// URI was encoded, unescape and then parse as UTF-8
1262+
var pathLength = UrlEncoder.Decode(path, path);
1263+
1264+
// Removing dot segments must be done after unescaping. From RFC 3986:
1265+
//
1266+
// URI producing applications should percent-encode data octets that
1267+
// correspond to characters in the reserved set unless these characters
1268+
// are specifically allowed by the URI scheme to represent data in that
1269+
// component. If a reserved character is found in a URI component and
1270+
// no delimiting role is known for that character, then it must be
1271+
// interpreted as representing the data octet corresponding to that
1272+
// character's encoding in US-ASCII.
1273+
//
1274+
// https://tools.ietf.org/html/rfc3986#section-2.2
1275+
pathLength = PathNormalizer.RemoveDotSegments(path.Slice(0, pathLength));
12601276

1261-
// URI was encoded, unescape and then parse as utf8
1262-
int pathLength = UrlEncoder.Decode(path, path);
12631277
requestUrlPath = GetUtf8String(path.Slice(0, pathLength));
12641278
}
12651279
else
12661280
{
1267-
// URI wasn't encoded, parse as ASCII
1268-
requestUrlPath = path.GetAsciiStringNonNullCharacters();
1281+
var pathLength = PathNormalizer.RemoveDotSegments(path);
12691282

1270-
if (query.Length == 0)
1283+
if (path.Length == pathLength && query.Length == 0)
12711284
{
1272-
// No need to allocate an extra string if the path didn't need
1273-
// decoding and there's no query string following it.
1274-
rawTarget = requestUrlPath;
1285+
// If no decoding was required, no dot segments were removed and
1286+
// there is no query, the request path is the same as the raw target
1287+
requestUrlPath = rawTarget;
12751288
}
12761289
else
12771290
{
1278-
rawTarget = target.GetAsciiStringNonNullCharacters();
1291+
requestUrlPath = path.Slice(0, pathLength).GetAsciiStringNonNullCharacters();
12791292
}
12801293
}
12811294
}
@@ -1286,7 +1299,7 @@ private void OnOriginFormTarget(HttpMethod method, HttpVersion version, Span<byt
12861299

12871300
QueryString = query.GetAsciiStringNonNullCharacters();
12881301
RawTarget = rawTarget;
1289-
Path = PathNormalizer.RemoveDotSegments(requestUrlPath);
1302+
Path = requestUrlPath;
12901303
}
12911304

12921305
private void OnAuthorityFormTarget(HttpMethod method, Span<byte> target)
@@ -1360,7 +1373,7 @@ private void OnAbsoluteFormTarget(Span<byte> target, Span<byte> query)
13601373
RejectRequestTarget(target);
13611374
}
13621375

1363-
Path = PathNormalizer.RemoveDotSegments(uri.LocalPath);
1376+
Path = uri.LocalPath;
13641377
// don't use uri.Query because we need the unescaped version
13651378
QueryString = query.GetAsciiStringNonNullCharacters();
13661379
}

src/Microsoft.AspNetCore.Server.Kestrel/Internal/Http/PathNormalizer.cs

Lines changed: 154 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,117 +1,204 @@
11
// Copyright (c) .NET Foundation. All rights reserved.
22
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
33

4-
using System.Buffers;
4+
using System;
5+
using System.Diagnostics;
56

67
namespace Microsoft.AspNetCore.Server.Kestrel.Internal.Http
78
{
89
public static class PathNormalizer
910
{
10-
public static string RemoveDotSegments(string path)
11+
private const byte ByteSlash = (byte)'/';
12+
private const byte ByteDot = (byte)'.';
13+
14+
// In-place implementation of the algorithm from https://tools.ietf.org/html/rfc3986#section-5.2.4
15+
public static unsafe int RemoveDotSegments(Span<byte> input)
16+
{
17+
fixed (byte* start = &input.DangerousGetPinnableReference())
18+
{
19+
var end = start + input.Length;
20+
return RemoveDotSegments(start, end);
21+
}
22+
}
23+
24+
public static unsafe int RemoveDotSegments(byte* start, byte* end)
1125
{
12-
if (ContainsDotSegments(path))
26+
if (!ContainsDotSegments(start, end))
1327
{
14-
var normalizedChars = ArrayPool<char>.Shared.Rent(path.Length);
15-
var normalizedIndex = normalizedChars.Length;
16-
var pathIndex = path.Length - 1;
17-
var skipSegments = 0;
28+
return (int)(end - start);
29+
}
30+
31+
var src = start;
32+
var dst = start;
33+
34+
while (src < end)
35+
{
36+
var ch1 = *src;
37+
Debug.Assert(ch1 == '/', "Path segment must always start with a '/'");
38+
39+
byte ch2, ch3, ch4;
1840

19-
while (pathIndex >= 0)
41+
switch (end - src)
2042
{
21-
if (pathIndex >= 2 && path[pathIndex] == '.' && path[pathIndex - 1] == '.' && path[pathIndex - 2] == '/')
22-
{
23-
if (normalizedIndex == normalizedChars.Length || normalizedChars[normalizedIndex] != '/')
43+
case 1:
44+
break;
45+
case 2:
46+
ch2 = *(src + 1);
47+
48+
if (ch2 == ByteDot)
2449
{
25-
normalizedChars[--normalizedIndex] = '/';
50+
// B. if the input buffer begins with a prefix of "/./" or "/.",
51+
// where "." is a complete path segment, then replace that
52+
// prefix with "/" in the input buffer; otherwise,
53+
src += 1;
54+
*src = ByteSlash;
55+
continue;
2656
}
2757

28-
skipSegments++;
29-
pathIndex -= 3;
30-
}
31-
else if (pathIndex >= 1 && path[pathIndex] == '.' && path[pathIndex - 1] == '/')
32-
{
33-
pathIndex -= 2;
34-
}
35-
else
36-
{
37-
while (pathIndex >= 0)
38-
{
39-
var lastChar = path[pathIndex];
58+
break;
59+
case 3:
60+
ch2 = *(src + 1);
61+
ch3 = *(src + 2);
4062

41-
if (skipSegments == 0)
63+
if (ch2 == ByteDot && ch3 == ByteDot)
64+
{
65+
// C. if the input buffer begins with a prefix of "/../" or "/..",
66+
// where ".." is a complete path segment, then replace that
67+
// prefix with "/" in the input buffer and remove the last
68+
// segment and its preceding "/" (if any) from the output
69+
// buffer; otherwise,
70+
src += 2;
71+
*src = ByteSlash;
72+
73+
if (dst > start)
4274
{
43-
normalizedChars[--normalizedIndex] = lastChar;
75+
do
76+
{
77+
dst--;
78+
} while (dst > start && *dst != ByteSlash);
4479
}
4580

46-
pathIndex--;
81+
continue;
82+
}
83+
else if (ch2 == ByteDot && ch3 == ByteSlash)
84+
{
85+
// B. if the input buffer begins with a prefix of "/./" or "/.",
86+
// where "." is a complete path segment, then replace that
87+
// prefix with "/" in the input buffer; otherwise,
88+
src += 2;
89+
continue;
90+
}
91+
92+
break;
93+
default:
94+
ch2 = *(src + 1);
95+
ch3 = *(src + 2);
96+
ch4 = *(src + 3);
4797

48-
if (lastChar == '/')
98+
if (ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash)
99+
{
100+
// C. if the input buffer begins with a prefix of "/../" or "/..",
101+
// where ".." is a complete path segment, then replace that
102+
// prefix with "/" in the input buffer and remove the last
103+
// segment and its preceding "/" (if any) from the output
104+
// buffer; otherwise,
105+
src += 3;
106+
107+
if (dst > start)
49108
{
50-
break;
109+
do
110+
{
111+
dst--;
112+
} while (dst > start && *dst != ByteSlash);
51113
}
52-
}
53114

54-
if (skipSegments > 0)
115+
continue;
116+
}
117+
else if (ch2 == ByteDot && ch3 == ByteSlash)
55118
{
56-
skipSegments--;
119+
// B. if the input buffer begins with a prefix of "/./" or "/.",
120+
// where "." is a complete path segment, then replace that
121+
// prefix with "/" in the input buffer; otherwise,
122+
src += 2;
123+
continue;
57124
}
58-
}
125+
126+
break;
59127
}
60128

61-
path = new string(normalizedChars, normalizedIndex, normalizedChars.Length - normalizedIndex);
62-
ArrayPool<char>.Shared.Return(normalizedChars);
129+
// E. move the first path segment in the input buffer to the end of
130+
// the output buffer, including the initial "/" character (if
131+
// any) and any subsequent characters up to, but not including,
132+
// the next "/" character or the end of the input buffer.
133+
do
134+
{
135+
*dst++ = ch1;
136+
ch1 = *++src;
137+
} while (src < end && ch1 != ByteSlash);
138+
}
139+
140+
if (dst == start)
141+
{
142+
*dst++ = ByteSlash;
63143
}
64144

65-
return path;
145+
return (int)(dst - start);
66146
}
67147

68-
private unsafe static bool ContainsDotSegments(string path)
148+
public static unsafe bool ContainsDotSegments(byte* start, byte* end)
69149
{
70-
fixed (char* ptr = path)
71-
{
72-
char* end = ptr + path.Length;
150+
var src = start;
151+
var dst = start;
73152

74-
for (char* p = ptr; p < end; p++)
75-
{
76-
if (*p == '/')
77-
{
78-
p++;
79-
}
153+
while (src < end)
154+
{
155+
var ch1 = *src;
156+
Debug.Assert(ch1 == '/', "Path segment must always start with a '/'");
80157

81-
if (p == end)
82-
{
83-
return false;
84-
}
158+
byte ch2, ch3, ch4;
85159

86-
if (*p == '.')
87-
{
88-
p++;
160+
switch (end - src)
161+
{
162+
case 1:
163+
break;
164+
case 2:
165+
ch2 = *(src + 1);
89166

90-
if (p == end)
167+
if (ch2 == ByteDot)
91168
{
92169
return true;
93170
}
94171

95-
if (*p == '.')
172+
break;
173+
case 3:
174+
ch2 = *(src + 1);
175+
ch3 = *(src + 2);
176+
177+
if ((ch2 == ByteDot && ch3 == ByteDot) ||
178+
(ch2 == ByteDot && ch3 == ByteSlash))
96179
{
97-
p++;
180+
return true;
181+
}
98182

99-
if (p == end)
100-
{
101-
return true;
102-
}
183+
break;
184+
default:
185+
ch2 = *(src + 1);
186+
ch3 = *(src + 2);
187+
ch4 = *(src + 3);
103188

104-
if (*p == '/')
105-
{
106-
return true;
107-
}
108-
}
109-
else if (*p == '/')
189+
if ((ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) ||
190+
(ch2 == ByteDot && ch3 == ByteSlash))
110191
{
111192
return true;
112193
}
113-
}
194+
195+
break;
114196
}
197+
198+
do
199+
{
200+
ch1 = *++src;
201+
} while (src < end && ch1 != ByteSlash);
115202
}
116203

117204
return false;

0 commit comments

Comments
 (0)