Skip to content

Commit 2bfe21b

Browse files
Support writing base64 JSON segments (#111041)
1 parent 7132268 commit 2bfe21b

File tree

7 files changed

+912
-312
lines changed

7 files changed

+912
-312
lines changed

src/libraries/Fuzzing/DotnetFuzzing/Fuzzers/Utf8JsonWriterFuzzer.cs

Lines changed: 88 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
using System;
55
using System.Buffers;
6+
using System.Buffers.Text;
67
using System.Collections;
78
using System.Collections.Generic;
89
using System.Diagnostics;
@@ -33,8 +34,11 @@ internal sealed class Utf8JsonWriterFuzzer : IFuzzer
3334
private const byte NewLineFlag = 1 << 3;
3435
private const byte SkipValidationFlag = 1 << 4;
3536

36-
// Options for choosing between UTF-8 and UTF-16 encoding
37-
private const byte EncodingFlag = 1 << 5;
37+
// Options for choosing between base64, UTF-8 and UTF-16 encoding
38+
private const byte EncodingMask = 0b11 << 5;
39+
private const byte Utf8EncodingFlag = 0b00 << 5;
40+
private const byte Utf16EncodingFlag = 0b01 << 5;
41+
private const byte Base64EncodingFlag = 0b10 << 5;
3842

3943
public void FuzzTarget(ReadOnlySpan<byte> bytes)
4044
{
@@ -53,8 +57,13 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
5357
ReadOnlySpan<char> chars = MemoryMarshal.Cast<byte, char>(bytes);
5458

5559
// Validate that the indices are within bounds of the input
56-
bool utf8 = (optionsByte & EncodingFlag) == 0;
57-
if (!(0 <= slice1 && slice1 <= slice2 && slice2 <= (utf8 ? bytes.Length : chars.Length)))
60+
int encoding = optionsByte & EncodingMask;
61+
if (encoding is not Utf8EncodingFlag and not Utf16EncodingFlag and not Base64EncodingFlag)
62+
{
63+
return;
64+
}
65+
66+
if (!(0 <= slice1 && slice1 <= slice2 && slice2 <= (encoding is Utf16EncodingFlag ? chars.Length : bytes.Length)))
5867
{
5968
return;
6069
}
@@ -63,7 +72,7 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
6372
bool indented = (optionsByte & IndentFlag) == 0;
6473
JsonWriterOptions options = new()
6574
{
66-
Encoder = (optionsByte & EncodingFlag) == 0 ? JavaScriptEncoder.Default : JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
75+
Encoder = (optionsByte & EncoderFlag) == 0 ? JavaScriptEncoder.Default : JavaScriptEncoder.UnsafeRelaxedJsonEscaping,
6776
Indented = indented,
6877
MaxDepth = (optionsByte & MaxDepthFlag) == 0 ? 1 : 0,
6978
NewLine = (optionsByte & NewLineFlag) == 0 ? "\n" : "\r\n",
@@ -74,9 +83,9 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
7483
int maxExpandedSizeBytes = 6 * bytes.Length + 2;
7584
byte[] expectedBuffer = ArrayPool<byte>.Shared.Rent(maxExpandedSizeBytes);
7685
Span<byte> expected =
77-
expectedBuffer.AsSpan(0, utf8
78-
? EncodeToUtf8(bytes, expectedBuffer, options.Encoder)
79-
: EncodeToUtf8(chars, expectedBuffer, options.Encoder));
86+
expectedBuffer.AsSpan(0, encoding == Utf16EncodingFlag
87+
? EncodeToUtf8(chars, expectedBuffer, options.Encoder)
88+
: EncodeToUtf8(bytes, expectedBuffer, options.Encoder, encoding == Base64EncodingFlag));
8089

8190
// Compute the actual result by using Utf8JsonWriter. Each iteration is a different slice of the input, but the result should be the same.
8291
byte[] actualBuffer = new byte[expected.Length];
@@ -89,14 +98,14 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
8998
{
9099
using MemoryStream stream = new(actualBuffer);
91100
using Utf8JsonWriter writer = new(stream, options);
92-
93-
if (utf8)
101+
102+
if (encoding == Utf16EncodingFlag)
94103
{
95-
WriteStringValueSegments(writer, bytes, ranges);
104+
WriteStringValueSegments(writer, chars, ranges);
96105
}
97106
else
98107
{
99-
WriteStringValueSegments(writer, chars, ranges);
108+
WriteStringValueSegments(writer, bytes, ranges, encoding == Base64EncodingFlag);
100109
}
101110

102111
writer.Flush();
@@ -110,7 +119,7 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
110119
}
111120

112121
// Additional test for mixing UTF-8 and UTF-16 encoding. The alignment math is easier in UTF-16 mode so just run it for that.
113-
if (!utf8)
122+
if (encoding == Utf16EncodingFlag)
114123
{
115124
Array.Clear(expectedBuffer);
116125

@@ -124,9 +133,16 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
124133
using MemoryStream stream = new(actualBuffer);
125134
using Utf8JsonWriter writer = new(stream, options);
126135

136+
// UTF-16 + UTF-8
127137
writer.WriteStringValueSegment(firstSegment, false);
128-
129138
Assert.Throws<InvalidOperationException, ReadOnlySpan<byte>>(state => writer.WriteStringValueSegment(state, true), secondSegment);
139+
140+
stream.Position = 0;
141+
writer.Reset();
142+
143+
// UTF-16 + Base64
144+
writer.WriteStringValueSegment(firstSegment, false);
145+
Assert.Throws<InvalidOperationException, ReadOnlySpan<byte>>(state => writer.WriteBase64StringSegment(state, true), secondSegment);
130146
}
131147

132148
Array.Clear(expectedBuffer);
@@ -135,25 +151,67 @@ public void FuzzTarget(ReadOnlySpan<byte> bytes)
135151
ReadOnlySpan<byte> firstSegment = bytes[0..(2 * slice1)];
136152
ReadOnlySpan<char> secondSegment = chars[slice1..];
137153

138-
expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, secondSegment, expectedBuffer, options.Encoder));
154+
expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, expectedBuffer, options.Encoder, base64Encode: false));
139155

140156
actualBuffer = new byte[expected.Length];
141157
using MemoryStream stream = new(actualBuffer);
142158
using Utf8JsonWriter writer = new(stream, options);
143159

160+
// UTF-8 + UTF-16
144161
writer.WriteStringValueSegment(firstSegment, false);
145162
Assert.Throws<InvalidOperationException, ReadOnlySpan<char>>(state => writer.WriteStringValueSegment(state, true), secondSegment);
163+
164+
stream.Position = 0;
165+
writer.Reset();
166+
167+
// UTF-8 + Base64
168+
writer.WriteStringValueSegment(firstSegment, false);
169+
Assert.Throws<InvalidOperationException, ReadOnlySpan<byte>>(state => writer.WriteBase64StringSegment(state, true), MemoryMarshal.AsBytes(secondSegment));
170+
}
171+
172+
Array.Clear(expectedBuffer);
173+
174+
{
175+
ReadOnlySpan<byte> firstSegment = bytes[0..(2 * slice1)];
176+
ReadOnlySpan<char> secondSegment = chars[slice1..];
177+
178+
expected = expectedBuffer.AsSpan(0, EncodeToUtf8(firstSegment, expectedBuffer, options.Encoder, base64Encode: true));
179+
180+
actualBuffer = new byte[expected.Length];
181+
using MemoryStream stream = new(actualBuffer);
182+
using Utf8JsonWriter writer = new(stream, options);
183+
184+
// Base64 + UTF-16
185+
writer.WriteBase64StringSegment(firstSegment, false);
186+
Assert.Throws<InvalidOperationException, ReadOnlySpan<char>>(state => writer.WriteStringValueSegment(state, true), secondSegment);
187+
188+
stream.Position = 0;
189+
writer.Reset();
190+
191+
// Base64 + UTF-8
192+
writer.WriteBase64StringSegment(firstSegment, false);
193+
Assert.Throws<InvalidOperationException, ReadOnlySpan<byte>>(state => writer.WriteStringValueSegment(state, true), MemoryMarshal.AsBytes(secondSegment));
146194
}
147195
}
148196

149197
ArrayPool<byte>.Shared.Return(expectedBuffer);
150198
}
151199

152-
private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan<byte> bytes, ReadOnlySpan<Range> ranges)
200+
private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan<byte> bytes, ReadOnlySpan<Range> ranges, bool base64Encode)
153201
{
154-
for (int i = 0; i < ranges.Length; i++)
202+
if (base64Encode)
203+
{
204+
for (int i = 0; i < ranges.Length; i++)
205+
{
206+
writer.WriteBase64StringSegment(bytes[ranges[i]], i == ranges.Length - 1);
207+
}
208+
}
209+
else
155210
{
156-
writer.WriteStringValueSegment(bytes[ranges[i]], i == ranges.Length - 1);
211+
for (int i = 0; i < ranges.Length; i++)
212+
{
213+
writer.WriteStringValueSegment(bytes[ranges[i]], i == ranges.Length - 1);
214+
}
157215
}
158216
}
159217

@@ -165,10 +223,20 @@ private static void WriteStringValueSegments(Utf8JsonWriter writer, ReadOnlySpan
165223
}
166224
}
167225

168-
private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder)
226+
private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder, bool base64Encode)
169227
{
170228
destBuffer[0] = (byte)'"';
171-
encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int written, isFinalBlock: true);
229+
230+
int written;
231+
if (base64Encode)
232+
{
233+
Base64.EncodeToUtf8(bytes, destBuffer[1..], out _, out written, isFinalBlock: true);
234+
}
235+
else
236+
{
237+
encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out written, isFinalBlock: true);
238+
}
239+
172240
destBuffer[++written] = (byte)'"';
173241
return written + 1;
174242
}
@@ -181,27 +249,6 @@ private static int EncodeToUtf8(ReadOnlySpan<char> chars, Span<byte> destBuffer,
181249
return written + 1;
182250
}
183251

184-
private static int EncodeToUtf8(ReadOnlySpan<byte> bytes, ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder)
185-
{
186-
int written = 1;
187-
destBuffer[0] = (byte)'"';
188-
encoder.EncodeUtf8(bytes, destBuffer[1..], out _, out int writtenTemp, isFinalBlock: true);
189-
written += writtenTemp;
190-
destBuffer[written += EncodeTranscode(chars, destBuffer[written..], encoder, isFinalBlock: true)] = (byte)'"';
191-
return written + 1;
192-
}
193-
194-
private static int EncodeToUtf8(ReadOnlySpan<char> chars, ReadOnlySpan<byte> bytes, Span<byte> destBuffer, JavaScriptEncoder encoder)
195-
{
196-
int written = 1;
197-
destBuffer[0] = (byte)'"';
198-
written += EncodeTranscode(chars, destBuffer[1..], encoder, isFinalBlock: true);
199-
encoder.EncodeUtf8(bytes, destBuffer[written..], out _, out int writtenTemp, isFinalBlock: true);
200-
written += writtenTemp;
201-
destBuffer[written] = (byte)'"';
202-
return written + 1;
203-
}
204-
205252
private static int EncodeTranscode(ReadOnlySpan<char> chars, Span<byte> destBuffer, JavaScriptEncoder encoder, bool isFinalBlock = true)
206253
{
207254
var utf16buffer = ArrayPool<char>.Shared.Rent(6 * chars.Length);

src/libraries/System.Text.Json/ref/System.Text.Json.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,7 @@ public void WriteStringValue(string? value) { }
681681
public void WriteStringValue(System.Text.Json.JsonEncodedText value) { }
682682
public void WriteStringValueSegment(System.ReadOnlySpan<byte> value, bool isFinalSegment) { }
683683
public void WriteStringValueSegment(System.ReadOnlySpan<char> value, bool isFinalSegment) { }
684+
public void WriteBase64StringSegment(ReadOnlySpan<byte> value, bool isFinalSegment) { }
684685
}
685686
}
686687
namespace System.Text.Json.Nodes

src/libraries/System.Text.Json/src/System/Text/Json/Writer/Utf8JsonWriter.WriteValues.Helpers.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ namespace System.Text.Json
1212
{
1313
public sealed partial class Utf8JsonWriter
1414
{
15-
private bool HasPartialCodePoint => PartialCodePointLength != 0;
15+
private bool HasPartialStringData => PartialStringDataLength != 0;
1616

17-
private void ClearPartialCodePoint() => PartialCodePointLength = 0;
17+
private void ClearPartialStringData() => PartialStringDataLength = 0;
1818

1919
private void ValidateEncodingDidNotChange(SegmentEncoding currentSegmentEncoding)
2020
{
@@ -32,7 +32,7 @@ private void ValidateNotWithinUnfinalizedString()
3232
}
3333

3434
Debug.Assert(PreviousSegmentEncoding == SegmentEncoding.None);
35-
Debug.Assert(!HasPartialCodePoint);
35+
Debug.Assert(!HasPartialStringData);
3636
}
3737

3838
private void ValidateWritingValue()

0 commit comments

Comments
 (0)