Skip to content
This repository was archived by the owner on Dec 18, 2018. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions src/Microsoft.AspNet.Server.Kestrel/Http/Frame.cs
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ private bool TakeStartLine(SocketInput input)
{
return false;
}
var method = begin.GetString(scan);
var method = begin.GetAsciiString(scan);

scan.Take();
begin = scan;
Expand All @@ -653,7 +653,7 @@ private bool TakeStartLine(SocketInput input)
{
return false;
}
queryString = begin.GetString(scan);
queryString = begin.GetAsciiString(scan);
}

scan.Take();
Expand All @@ -662,20 +662,29 @@ private bool TakeStartLine(SocketInput input)
{
return false;
}
var httpVersion = begin.GetString(scan);
var httpVersion = begin.GetAsciiString(scan);

scan.Take();
if (scan.Take() != '\n')
{
return false;
}

// URIs are always encoded/escaped to ASCII https://tools.ietf.org/html/rfc3986#page-11
// Multibyte Internationalized Resource Identifiers (IRIs) are first converted to utf8;
// then encoded/escaped to ASCII https://www.ietf.org/rfc/rfc3987.txt "Mapping of IRIs to URIs"
string requestUrlPath;
if (needDecode)
{
// URI was encoded, unescape and then parse as utf8
pathEnd = UrlPathDecoder.Unescape(pathBegin, pathEnd);
requestUrlPath = pathBegin.GetUtf8String(pathEnd);
}
else
{
// URI wasn't encoded, parse as ASCII
requestUrlPath = pathBegin.GetAsciiString(pathEnd);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Always use Utf8 for the path.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Changed and reverted as discussed in outdated diff

}

var requestUrlPath = pathBegin.GetString(pathEnd);

consumed = scan;
Method = method;
Expand All @@ -691,11 +700,6 @@ private bool TakeStartLine(SocketInput input)
}
}

static string GetString(ArraySegment<byte> range, int startIndex, int endIndex)
{
return Encoding.UTF8.GetString(range.Array, range.Offset + startIndex, endIndex - startIndex);
}

public static bool TakeMessageHeaders(SocketInput input, FrameRequestHeaders requestHeaders)
{
var scan = input.ConsumingStart();
Expand Down Expand Up @@ -787,7 +791,7 @@ public static bool TakeMessageHeaders(SocketInput input, FrameRequestHeaders req
}

var name = beginName.GetArraySegment(endName);
var value = beginValue.GetString(endValue);
var value = beginValue.GetAsciiString(endValue);
if (wrapping)
{
value = value.Replace("\r\n", " ");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
using System;
using System.Linq;
using System.Numerics;
using System.Text;

namespace Microsoft.AspNet.Server.Kestrel.Infrastructure
{
Expand All @@ -22,8 +21,6 @@ public struct MemoryPoolIterator2
/// </summary>
private static Vector<byte> _dotIndex = new Vector<byte>(Enumerable.Range(0, Vector<byte>.Count).Select(x => (byte)-x).ToArray());

private static Encoding _utf8 = Encoding.UTF8;

private MemoryPoolBlock2 _block;
private int _index;

Expand Down Expand Up @@ -488,101 +485,6 @@ public int GetLength(MemoryPoolIterator2 end)
}
}

public string GetString(MemoryPoolIterator2 end)
{
if (IsDefault || end.IsDefault)
{
return default(string);
}
if (end._block == _block)
{
return _utf8.GetString(_block.Array, _index, end._index - _index);
}

var decoder = _utf8.GetDecoder();

var length = GetLength(end);
var charLength = length * 2;
var chars = new char[charLength];
var charIndex = 0;

var block = _block;
var index = _index;
var remaining = length;
while (true)
{
int bytesUsed;
int charsUsed;
bool completed;
var following = block.End - index;
if (remaining <= following)
{
decoder.Convert(
block.Array,
index,
remaining,
chars,
charIndex,
charLength - charIndex,
true,
out bytesUsed,
out charsUsed,
out completed);
return new string(chars, 0, charIndex + charsUsed);
}
else if (block.Next == null)
{
decoder.Convert(
block.Array,
index,
following,
chars,
charIndex,
charLength - charIndex,
true,
out bytesUsed,
out charsUsed,
out completed);
return new string(chars, 0, charIndex + charsUsed);
}
else
{
decoder.Convert(
block.Array,
index,
following,
chars,
charIndex,
charLength - charIndex,
false,
out bytesUsed,
out charsUsed,
out completed);
charIndex += charsUsed;
remaining -= following;
block = block.Next;
index = block.Start;
}
}
}

public ArraySegment<byte> GetArraySegment(MemoryPoolIterator2 end)
{
if (IsDefault || end.IsDefault)
{
return default(ArraySegment<byte>);
}
if (end._block == _block)
{
return new ArraySegment<byte>(_block.Array, _index, end._index - _index);
}

var length = GetLength(end);
var array = new byte[length];
CopyTo(array, 0, length, out length);
return new ArraySegment<byte>(array, 0, length);
}

public MemoryPoolIterator2 CopyTo(byte[] array, int offset, int count, out int actual)
{
if (IsDefault)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
// Copyright (c) .NET Foundation. All rights reserved.
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Text;

namespace Microsoft.AspNet.Server.Kestrel.Infrastructure
{
public static class MemoryPoolIterator2Extenstions
{
private const int _maxStackAllocBytes = 16384;

private static Encoding _utf8 = Encoding.UTF8;

private static unsafe string GetAsciiStringStack(byte[] input, int inputOffset, int length)
{
// avoid declaring other local vars, or doing work with stackalloc
// to prevent the .locals init cil flag , see: https://github.com/dotnet/coreclr/issues/1279
char* output = stackalloc char[length];

return GetAsciiStringImplementation(output, input, inputOffset, length);
}
private static unsafe string GetAsciiStringImplementation(char* output, byte[] input, int inputOffset, int length)
{
for (var i = 0; i < length; i++)
{
output[i] = (char)input[inputOffset + i];
}

return new string(output, 0, length);
}

private static unsafe string GetAsciiStringStack(MemoryPoolBlock2 start, MemoryPoolIterator2 end, int inputOffset, int length)
{
// avoid declaring other local vars, or doing work with stackalloc
// to prevent the .locals init cil flag , see: https://github.com/dotnet/coreclr/issues/1279
char* output = stackalloc char[length];

return GetAsciiStringImplementation(output, start, end, inputOffset, length);
}

private unsafe static string GetAsciiStringHeap(MemoryPoolBlock2 start, MemoryPoolIterator2 end, int inputOffset, int length)
{
var buffer = new char[length];

fixed (char* output = buffer)
{
return GetAsciiStringImplementation(output, start, end, inputOffset, length);
}
}

private static unsafe string GetAsciiStringImplementation(char* output, MemoryPoolBlock2 start, MemoryPoolIterator2 end, int inputOffset, int length)
{
var outputOffset = 0;
var block = start;
var remaining = length;

var endBlock = end.Block;
var endIndex = end.Index;

while (true)
{
int following = (block != endBlock ? block.End : endIndex) - inputOffset;

if (following > 0)
{
var input = block.Array;
for (var i = 0; i < following; i++)
{
output[i + outputOffset] = (char)input[i + inputOffset];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The real question is how do you detect and report when this is assumption wrong? E.g. when the byte > 7F (utf-8). Otherwise the app just sees corrupt bytes in their fields.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's because it is corrupt data

https://tools.ietf.org/html/rfc7230#section-3.2.4

Historically, HTTP has allowed field content with text in the
ISO-8859-1 charset [ISO-8859-1], supporting other charsets only
through use of [RFC2047] encoding. In practice, most HTTP header
field values use only a subset of the US-ASCII charset [USASCII].
Newly defined header fields SHOULD limit their field values to
US-ASCII octets. A recipient SHOULD treat other octets in field
content (obs-text) as opaque data.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or rather "opaque octets" to go with the lingo...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, but how do you report that it is corrupt data?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A recipient SHOULD treat other octets in field content (obs-text) as opaque data.

Says ignore it to me? Let it be read as the underlying byte value?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also don't pass into any interop C string processors or they may read it truncated at they will use the zero as a terminator; but that format of strings is bad for many many reasons... and interpreting it truncated is no so bad as reading past the end.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should file an issue for this AFAIK ASCIIEncoding turns things into ? /cc @halter73

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Gone back to previous implementation of not using Encoding.ASCII but just casting the bytes to (char) which seems safer for the opaque data? As Encoding.ASCII does all kinds of stuff...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rechecking my assumptions about 255...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bytes 0 and 255 are ok in string; added tests for the 3 paths

}

remaining -= following;
outputOffset += following;
}

if (remaining == 0)
{
return new string(output, 0, length);
}

block = block.Next;
inputOffset = block.Start;
}
}

public static string GetAsciiString(this MemoryPoolIterator2 start, MemoryPoolIterator2 end)
{
if (start.IsDefault || end.IsDefault)
{
return default(string);
}

var length = start.GetLength(end);

// Bytes out of the range of ascii are treated as "opaque data"
// and kept in string as a char value that casts to same input byte value
// https://tools.ietf.org/html/rfc7230#section-3.2.4
if (end.Block == start.Block)
{
return GetAsciiStringStack(start.Block.Array, start.Index, length);
}

if (length > _maxStackAllocBytes)
{
return GetAsciiStringHeap(start.Block, end, start.Index, length);
}

return GetAsciiStringStack(start.Block, end, start.Index, length);
}

public static string GetUtf8String(this MemoryPoolIterator2 start, MemoryPoolIterator2 end)
{
if (start.IsDefault || end.IsDefault)
{
return default(string);
}
if (end.Block == start.Block)
{
return _utf8.GetString(start.Block.Array, start.Index, end.Index - start.Index);
}

var decoder = _utf8.GetDecoder();

var length = start.GetLength(end);
var charLength = length * 2;
var chars = new char[charLength];
var charIndex = 0;

var block = start.Block;
var index = start.Index;
var remaining = length;
while (true)
{
int bytesUsed;
int charsUsed;
bool completed;
var following = block.End - index;
if (remaining <= following)
{
decoder.Convert(
block.Array,
index,
remaining,
chars,
charIndex,
charLength - charIndex,
true,
out bytesUsed,
out charsUsed,
out completed);
return new string(chars, 0, charIndex + charsUsed);
}
else if (block.Next == null)
{
decoder.Convert(
block.Array,
index,
following,
chars,
charIndex,
charLength - charIndex,
true,
out bytesUsed,
out charsUsed,
out completed);
return new string(chars, 0, charIndex + charsUsed);
}
else
{
decoder.Convert(
block.Array,
index,
following,
chars,
charIndex,
charLength - charIndex,
false,
out bytesUsed,
out charsUsed,
out completed);
charIndex += charsUsed;
remaining -= following;
block = block.Next;
index = block.Start;
}
}
}

public static ArraySegment<byte> GetArraySegment(this MemoryPoolIterator2 start, MemoryPoolIterator2 end)
{
if (start.IsDefault || end.IsDefault)
{
return default(ArraySegment<byte>);
}
if (end.Block == start.Block)
{
return new ArraySegment<byte>(start.Block.Array, start.Index, end.Index - start.Index);
}

var length = start.GetLength(end);
var array = new byte[length];
start.CopyTo(array, 0, length, out length);
return new ArraySegment<byte>(array, 0, length);
}
}
}
Loading