Skip to content

Commit 4a5dbcc

Browse files
committed
#13 - Utf8 validation.
1 parent ade78b1 commit 4a5dbcc

File tree

3 files changed

+143
-0
lines changed

3 files changed

+143
-0
lines changed

src/Microsoft.AspNet.WebSockets.Protocol/CommonWebSocket.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ public class CommonWebSocket : WebSocket
4040
private long _frameBytesRemaining;
4141
private int? _firstDataOpCode;
4242
private int _dataUnmaskOffset;
43+
private Utilities.Utf8MessageState _incomingUtf8MessageState = new Utilities.Utf8MessageState();
4344

4445
public CommonWebSocket(Stream stream, string subProtocol, TimeSpan keepAliveInterval, int receiveBufferSize, bool maskOutput, bool useZeroMask, bool unmaskInput)
4546
{
@@ -251,6 +252,14 @@ public async override Task<WebSocketReceiveResult> ReceiveAsync(ArraySegment<byt
251252

252253
WebSocketReceiveResult result;
253254
WebSocketMessageType messageType = Utilities.GetMessageType(opCode);
255+
256+
if (messageType == WebSocketMessageType.Text
257+
&& !Utilities.TryValidateUtf8(new ArraySegment<byte>(buffer.Array, buffer.Offset, bytesToCopy), _frameInProgress.Fin, _incomingUtf8MessageState))
258+
{
259+
await CloseOutputAsync(WebSocketCloseStatus.InvalidPayloadData, string.Empty, cancellationToken);
260+
throw new InvalidOperationException("An invalid UTF-8 payload was received.");
261+
}
262+
254263
if (bytesToCopy == _frameBytesRemaining)
255264
{
256265
result = new WebSocketReceiveResult(bytesToCopy, messageType, _frameInProgress.Fin);

src/Microsoft.AspNet.WebSockets.Protocol/Utilities.cs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,77 @@ public static WebSocketMessageType GetMessageType(int opCode)
6969
default: throw new NotImplementedException(opCode.ToString());
7070
}
7171
}
72+
73+
// For now this is stateless and does not handle sequences spliced across messages.
74+
// http://etutorials.org/Programming/secure+programming/Chapter+3.+Input+Validation/3.12+Detecting+Illegal+UTF-8+Characters/
75+
public static bool TryValidateUtf8(ArraySegment<byte> arraySegment, bool endOfMessage, Utf8MessageState state)
76+
{
77+
for (int i = arraySegment.Offset; i < arraySegment.Offset + arraySegment.Count; )
78+
{
79+
if (!state.SequenceInProgress)
80+
{
81+
state.SequenceInProgress = true;
82+
byte b = arraySegment.Array[i];
83+
if ((b & 0x80) == 0) // 0bbbbbbb, single byte
84+
{
85+
state.AdditionalBytesExpected = 0;
86+
}
87+
else if ((b & 0xC0) == 0x80)
88+
{
89+
return false; // Misplaced 10bbbbbb byte. This cannot be the first byte.
90+
}
91+
else if ((b & 0xE0) == 0xC0) // 110bbbbb 10bbbbbb
92+
{
93+
state.AdditionalBytesExpected = 1;
94+
}
95+
else if ((b & 0xF0) == 0xE0) // 1110bbbb 10bbbbbb 10bbbbbb
96+
{
97+
state.AdditionalBytesExpected = 2;
98+
}
99+
else if ((b & 0xF8) == 0xF0) // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
100+
{
101+
state.AdditionalBytesExpected = 3;
102+
}
103+
else if ((b & 0xFC) == 0xF8) // 111110bb 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb
104+
{
105+
state.AdditionalBytesExpected = 4;
106+
}
107+
else if ((b & 0xFE) == 0xFC) // 1111110b 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb 10bbbbbb
108+
{
109+
state.AdditionalBytesExpected = 5;
110+
}
111+
else // 11111110 && 11111111 are not valid
112+
{
113+
return false;
114+
}
115+
i++;
116+
}
117+
while (state.AdditionalBytesExpected > 0 && i < arraySegment.Offset + arraySegment.Count)
118+
{
119+
byte b = arraySegment.Array[i];
120+
if ((b & 0xC0) != 0x80)
121+
{
122+
return false;
123+
}
124+
state.AdditionalBytesExpected--;
125+
i++;
126+
}
127+
if (state.AdditionalBytesExpected == 0)
128+
{
129+
state.SequenceInProgress = false;
130+
}
131+
}
132+
if (endOfMessage && state.SequenceInProgress)
133+
{
134+
return false;
135+
}
136+
return true;
137+
}
138+
139+
public class Utf8MessageState
140+
{
141+
public bool SequenceInProgress { get; set; }
142+
public int AdditionalBytesExpected { get; set; }
143+
}
72144
}
73145
}
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Copyright (c) Microsoft Open Technologies, Inc. All rights reserved.
2+
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.
3+
4+
using System;
5+
using System.Text;
6+
using Xunit;
7+
8+
namespace Microsoft.AspNet.WebSockets.Protocol.Test
9+
{
10+
public class Utf8ValidationTests
11+
{
12+
[Theory]
13+
[InlineData(new byte[] { })]
14+
[InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x57, 0x6F, 0x72, 0x6C, 0x64 })] // Hello World
15+
[InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x2D, 0xC2, 0xB5, 0x40, 0xC3, 0x9F, 0xC3, 0xB6, 0xC3, 0xA4, 0xC3, 0xBC, 0xC3, 0xA0, 0xC3, 0xA1 })] // "Hello-µ@ßöäüàá";
16+
public void ValidateSingleValidSegments_Valid(byte[] data)
17+
{
18+
var state = new Utilities.Utf8MessageState();
19+
Assert.True(Utilities.TryValidateUtf8(new ArraySegment<byte>(data), endOfMessage: true, state: state));
20+
}
21+
22+
[Theory]
23+
[InlineData(new byte[] { }, new byte[] { }, new byte[] { })]
24+
[InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20 }, new byte[] { }, new byte[] { 0x57, 0x6F, 0x72, 0x6C, 0x64 })] // Hello ,, World
25+
[InlineData(new byte[] { 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x2D, 0xC2, }, new byte[] { 0xB5, 0x40, 0xC3, 0x9F, 0xC3, 0xB6, 0xC3, 0xA4, }, new byte[] { 0xC3, 0xBC, 0xC3, 0xA0, 0xC3, 0xA1 })] // "Hello-µ@ßöäüàá";
26+
public void ValidateMultipleValidSegments_Valid(byte[] data1, byte[] data2, byte[] data3)
27+
{
28+
var state = new Utilities.Utf8MessageState();
29+
Assert.True(Utilities.TryValidateUtf8(new ArraySegment<byte>(data1), endOfMessage: false, state: state));
30+
Assert.True(Utilities.TryValidateUtf8(new ArraySegment<byte>(data2), endOfMessage: false, state: state));
31+
Assert.True(Utilities.TryValidateUtf8(new ArraySegment<byte>(data3), endOfMessage: true, state: state));
32+
}
33+
34+
[Theory]
35+
[InlineData(new byte[] { 0xfe })]
36+
[InlineData(new byte[] { 0xff })]
37+
[InlineData(new byte[] { 0xfe, 0xfe, 0xff, 0xff })]
38+
// [InlineData(new byte[] { 0xc0, 0xaf })]
39+
// [InlineData(new byte[] { 0xe0, 0x80, 0xaf })]
40+
// [InlineData(new byte[] { 0xf4, 0x90, 0x80, 0x80 })]
41+
// [InlineData(new byte[] { 0xf0, 0x80, 0x80, 0xaf })]
42+
// [InlineData(new byte[] { 0xf8, 0x80, 0x80, 0x80, 0xaf })]
43+
// [InlineData(new byte[] { 0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf })]
44+
// [InlineData(new byte[] { 0xc1, 0xbf })]
45+
// [InlineData(new byte[] { 0xed, 0xa0, 0x80, 0x65, 0x64, 0x69, 0x74, 0x65, 0x64 })] // 0xEDA080 decodes to 0xD800, which is a reserved high surrogate character.
46+
public void ValidateSingleInvalidSegment_Invalid(byte[] data)
47+
{
48+
var state = new Utilities.Utf8MessageState();
49+
Assert.False(Utilities.TryValidateUtf8(new ArraySegment<byte>(data), endOfMessage: true, state: state));
50+
}
51+
/*
52+
[Theory]
53+
// [InlineData(true, new byte[] { 0xce, 0xba, 0xe1, 0xbd, 0xb9, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5, 0xf4 }, false, new byte[] { 0x90 }, true, new byte[] { })]
54+
public void ValidateMultipleInvalidSegments_Invalid(bool valid1, byte[] data1, bool valid2, byte[] data2, bool valid3, byte[] data3)
55+
{
56+
var state = new Utilities.Utf8MessageState();
57+
Assert.True(valid1 == Utilities.TryValidateUtf8(new ArraySegment<byte>(data1), endOfMessage: false, state: state), "1st");
58+
Assert.True(valid2 == Utilities.TryValidateUtf8(new ArraySegment<byte>(data2), endOfMessage: false, state: state), "2nd");
59+
Assert.True(valid3 == Utilities.TryValidateUtf8(new ArraySegment<byte>(data3), endOfMessage: true, state: state), "3rd");
60+
}*/
61+
}
62+
}

0 commit comments

Comments
 (0)