|
1 | 1 | // Copyright (c) .NET Foundation. All rights reserved.
|
2 | 2 | // Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
|
3 | 3 |
|
4 |
| -using System.Buffers; |
| 4 | +using System; |
| 5 | +using System.Diagnostics; |
5 | 6 |
|
6 | 7 | namespace Microsoft.AspNetCore.Server.Kestrel.Internal.Http
|
7 | 8 | {
|
8 | 9 | public static class PathNormalizer
|
9 | 10 | {
|
10 |
| - public static string RemoveDotSegments(string path) |
| 11 | + private const byte ByteSlash = (byte)'/'; |
| 12 | + private const byte ByteDot = (byte)'.'; |
| 13 | + |
| 14 | + // In-place implementation of the algorithm from https://tools.ietf.org/html/rfc3986#section-5.2.4 |
| 15 | + public static unsafe int RemoveDotSegments(Span<byte> input) |
| 16 | + { |
| 17 | + fixed (byte* start = &input.DangerousGetPinnableReference()) |
| 18 | + { |
| 19 | + var end = start + input.Length; |
| 20 | + return RemoveDotSegments(start, end); |
| 21 | + } |
| 22 | + } |
| 23 | + |
| 24 | + public static unsafe int RemoveDotSegments(byte* start, byte* end) |
11 | 25 | {
|
12 |
| - if (ContainsDotSegments(path)) |
| 26 | + if (!ContainsDotSegments(start, end)) |
13 | 27 | {
|
14 |
| - var normalizedChars = ArrayPool<char>.Shared.Rent(path.Length); |
15 |
| - var normalizedIndex = normalizedChars.Length; |
16 |
| - var pathIndex = path.Length - 1; |
17 |
| - var skipSegments = 0; |
| 28 | + return (int)(end - start); |
| 29 | + } |
| 30 | + |
| 31 | + var src = start; |
| 32 | + var dst = start; |
| 33 | + |
| 34 | + while (src < end) |
| 35 | + { |
| 36 | + var ch1 = *src; |
| 37 | + Debug.Assert(ch1 == '/', "Path segment must always start with a '/'"); |
| 38 | + |
| 39 | + byte ch2, ch3, ch4; |
18 | 40 |
|
19 |
| - while (pathIndex >= 0) |
| 41 | + switch (end - src) |
20 | 42 | {
|
21 |
| - if (pathIndex >= 2 && path[pathIndex] == '.' && path[pathIndex - 1] == '.' && path[pathIndex - 2] == '/') |
22 |
| - { |
23 |
| - if (normalizedIndex == normalizedChars.Length || normalizedChars[normalizedIndex] != '/') |
| 43 | + case 1: |
| 44 | + break; |
| 45 | + case 2: |
| 46 | + ch2 = *(src + 1); |
| 47 | + |
| 48 | + if (ch2 == ByteDot) |
24 | 49 | {
|
25 |
| - normalizedChars[--normalizedIndex] = '/'; |
| 50 | + // B. if the input buffer begins with a prefix of "/./" or "/.", |
| 51 | + // where "." is a complete path segment, then replace that |
| 52 | + // prefix with "/" in the input buffer; otherwise, |
| 53 | + src += 1; |
| 54 | + *src = ByteSlash; |
| 55 | + continue; |
26 | 56 | }
|
27 | 57 |
|
28 |
| - skipSegments++; |
29 |
| - pathIndex -= 3; |
30 |
| - } |
31 |
| - else if (pathIndex >= 1 && path[pathIndex] == '.' && path[pathIndex - 1] == '/') |
32 |
| - { |
33 |
| - pathIndex -= 2; |
34 |
| - } |
35 |
| - else |
36 |
| - { |
37 |
| - while (pathIndex >= 0) |
38 |
| - { |
39 |
| - var lastChar = path[pathIndex]; |
| 58 | + break; |
| 59 | + case 3: |
| 60 | + ch2 = *(src + 1); |
| 61 | + ch3 = *(src + 2); |
40 | 62 |
|
41 |
| - if (skipSegments == 0) |
| 63 | + if (ch2 == ByteDot && ch3 == ByteDot) |
| 64 | + { |
| 65 | + // C. if the input buffer begins with a prefix of "/../" or "/..", |
| 66 | + // where ".." is a complete path segment, then replace that |
| 67 | + // prefix with "/" in the input buffer and remove the last |
| 68 | + // segment and its preceding "/" (if any) from the output |
| 69 | + // buffer; otherwise, |
| 70 | + src += 2; |
| 71 | + *src = ByteSlash; |
| 72 | + |
| 73 | + if (dst > start) |
42 | 74 | {
|
43 |
| - normalizedChars[--normalizedIndex] = lastChar; |
| 75 | + do |
| 76 | + { |
| 77 | + dst--; |
| 78 | + } while (dst > start && *dst != ByteSlash); |
44 | 79 | }
|
45 | 80 |
|
46 |
| - pathIndex--; |
| 81 | + continue; |
| 82 | + } |
| 83 | + else if (ch2 == ByteDot && ch3 == ByteSlash) |
| 84 | + { |
| 85 | + // B. if the input buffer begins with a prefix of "/./" or "/.", |
| 86 | + // where "." is a complete path segment, then replace that |
| 87 | + // prefix with "/" in the input buffer; otherwise, |
| 88 | + src += 2; |
| 89 | + continue; |
| 90 | + } |
| 91 | + |
| 92 | + break; |
| 93 | + default: |
| 94 | + ch2 = *(src + 1); |
| 95 | + ch3 = *(src + 2); |
| 96 | + ch4 = *(src + 3); |
47 | 97 |
|
48 |
| - if (lastChar == '/') |
| 98 | + if (ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) |
| 99 | + { |
| 100 | + // C. if the input buffer begins with a prefix of "/../" or "/..", |
| 101 | + // where ".." is a complete path segment, then replace that |
| 102 | + // prefix with "/" in the input buffer and remove the last |
| 103 | + // segment and its preceding "/" (if any) from the output |
| 104 | + // buffer; otherwise, |
| 105 | + src += 3; |
| 106 | + |
| 107 | + if (dst > start) |
49 | 108 | {
|
50 |
| - break; |
| 109 | + do |
| 110 | + { |
| 111 | + dst--; |
| 112 | + } while (dst > start && *dst != ByteSlash); |
51 | 113 | }
|
52 |
| - } |
53 | 114 |
|
54 |
| - if (skipSegments > 0) |
| 115 | + continue; |
| 116 | + } |
| 117 | + else if (ch2 == ByteDot && ch3 == ByteSlash) |
55 | 118 | {
|
56 |
| - skipSegments--; |
| 119 | + // B. if the input buffer begins with a prefix of "/./" or "/.", |
| 120 | + // where "." is a complete path segment, then replace that |
| 121 | + // prefix with "/" in the input buffer; otherwise, |
| 122 | + src += 2; |
| 123 | + continue; |
57 | 124 | }
|
58 |
| - } |
| 125 | + |
| 126 | + break; |
59 | 127 | }
|
60 | 128 |
|
61 |
| - path = new string(normalizedChars, normalizedIndex, normalizedChars.Length - normalizedIndex); |
62 |
| - ArrayPool<char>.Shared.Return(normalizedChars); |
| 129 | + // E. move the first path segment in the input buffer to the end of |
| 130 | + // the output buffer, including the initial "/" character (if |
| 131 | + // any) and any subsequent characters up to, but not including, |
| 132 | + // the next "/" character or the end of the input buffer. |
| 133 | + do |
| 134 | + { |
| 135 | + *dst++ = ch1; |
| 136 | + ch1 = *++src; |
| 137 | + } while (src < end && ch1 != ByteSlash); |
| 138 | + } |
| 139 | + |
| 140 | + if (dst == start) |
| 141 | + { |
| 142 | + *dst++ = ByteSlash; |
63 | 143 | }
|
64 | 144 |
|
65 |
| - return path; |
| 145 | + return (int)(dst - start); |
66 | 146 | }
|
67 | 147 |
|
68 |
| - private unsafe static bool ContainsDotSegments(string path) |
| 148 | + public static unsafe bool ContainsDotSegments(byte* start, byte* end) |
69 | 149 | {
|
70 |
| - fixed (char* ptr = path) |
71 |
| - { |
72 |
| - char* end = ptr + path.Length; |
| 150 | + var src = start; |
| 151 | + var dst = start; |
73 | 152 |
|
74 |
| - for (char* p = ptr; p < end; p++) |
75 |
| - { |
76 |
| - if (*p == '/') |
77 |
| - { |
78 |
| - p++; |
79 |
| - } |
| 153 | + while (src < end) |
| 154 | + { |
| 155 | + var ch1 = *src; |
| 156 | + Debug.Assert(ch1 == '/', "Path segment must always start with a '/'"); |
80 | 157 |
|
81 |
| - if (p == end) |
82 |
| - { |
83 |
| - return false; |
84 |
| - } |
| 158 | + byte ch2, ch3, ch4; |
85 | 159 |
|
86 |
| - if (*p == '.') |
87 |
| - { |
88 |
| - p++; |
| 160 | + switch (end - src) |
| 161 | + { |
| 162 | + case 1: |
| 163 | + break; |
| 164 | + case 2: |
| 165 | + ch2 = *(src + 1); |
89 | 166 |
|
90 |
| - if (p == end) |
| 167 | + if (ch2 == ByteDot) |
91 | 168 | {
|
92 | 169 | return true;
|
93 | 170 | }
|
94 | 171 |
|
95 |
| - if (*p == '.') |
| 172 | + break; |
| 173 | + case 3: |
| 174 | + ch2 = *(src + 1); |
| 175 | + ch3 = *(src + 2); |
| 176 | + |
| 177 | + if ((ch2 == ByteDot && ch3 == ByteDot) || |
| 178 | + (ch2 == ByteDot && ch3 == ByteSlash)) |
96 | 179 | {
|
97 |
| - p++; |
| 180 | + return true; |
| 181 | + } |
98 | 182 |
|
99 |
| - if (p == end) |
100 |
| - { |
101 |
| - return true; |
102 |
| - } |
| 183 | + break; |
| 184 | + default: |
| 185 | + ch2 = *(src + 1); |
| 186 | + ch3 = *(src + 2); |
| 187 | + ch4 = *(src + 3); |
103 | 188 |
|
104 |
| - if (*p == '/') |
105 |
| - { |
106 |
| - return true; |
107 |
| - } |
108 |
| - } |
109 |
| - else if (*p == '/') |
| 189 | + if ((ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) || |
| 190 | + (ch2 == ByteDot && ch3 == ByteSlash)) |
110 | 191 | {
|
111 | 192 | return true;
|
112 | 193 | }
|
113 |
| - } |
| 194 | + |
| 195 | + break; |
114 | 196 | }
|
| 197 | + |
| 198 | + do |
| 199 | + { |
| 200 | + ch1 = *++src; |
| 201 | + } while (src < end && ch1 != ByteSlash); |
115 | 202 | }
|
116 | 203 |
|
117 | 204 | return false;
|
|
0 commit comments