Skip to content

Commit d2128b2

Browse files
authored
Merge pull request #998 from nahk-ivanov/alexiva/fix-json-utf32
Fix JSON serialization for UTF-32 characters. +semver:fix
2 parents 1055eb7 + 7333635 commit d2128b2

File tree

5 files changed

+66
-11
lines changed

5 files changed

+66
-11
lines changed

YamlDotNet.Test/Serialization/SerializationTests.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -886,6 +886,15 @@ public void SerializationOfAnchorWorksInJson()
886886
.BeEquivalentTo(@"{""x"": {""z"": {""v"": ""1""}}, ""y"": {""k"": {""z"": {""v"": ""1""}}}}");
887887
}
888888

889+
[Fact]
890+
public void SerializationOfUtf32WorksInJson()
891+
{
892+
var obj = new { TestProperty = "Sea life \U0001F99E" };
893+
894+
SerializerBuilder.JsonCompatible().Build().Serialize(obj).Trim().Should()
895+
.Be(@"{""TestProperty"": ""Sea life \uD83E\uDD9E""}");
896+
}
897+
889898
[Fact]
890899
// Todo: this is actually roundtrip
891900
public void DeserializationOfDefaultsWorkInJson()

YamlDotNet/Core/Emitter.cs

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ public class Emitter : IEmitter
6666
private bool isWhitespace;
6767
private bool isIndentation;
6868
private readonly bool forceIndentLess;
69+
private readonly bool useUtf16SurrogatePair;
6970
private readonly string newLine;
7071

7172
private bool isDocumentEndWritten;
@@ -148,6 +149,7 @@ public Emitter(TextWriter output, EmitterSettings settings)
148149
this.maxSimpleKeyLength = settings.MaxSimpleKeyLength;
149150
this.skipAnchorName = settings.SkipAnchorName;
150151
this.forceIndentLess = !settings.IndentSequences;
152+
this.useUtf16SurrogatePair = settings.UseUtf16SurrogatePairs;
151153
this.newLine = settings.NewLine;
152154

153155
this.output = output;
@@ -1189,8 +1191,20 @@ private void WriteDoubleQuotedScalar(string value, bool allowBreaks)
11891191
{
11901192
if (index + 1 < value.Length && IsLowSurrogate(value[index + 1]))
11911193
{
1192-
Write('U');
1193-
Write(char.ConvertToUtf32(character, value[index + 1]).ToString("X08", CultureInfo.InvariantCulture));
1194+
if (useUtf16SurrogatePair)
1195+
{
1196+
Write('u');
1197+
Write(code.ToString("X04", CultureInfo.InvariantCulture));
1198+
Write('\\');
1199+
Write('u');
1200+
Write(((ushort)value[index + 1]).ToString("X04", CultureInfo.InvariantCulture));
1201+
}
1202+
else
1203+
{
1204+
Write('U');
1205+
Write(char.ConvertToUtf32(character, value[index + 1]).ToString("X08", CultureInfo.InvariantCulture));
1206+
}
1207+
11941208
index++;
11951209
}
11961210
else

YamlDotNet/Core/EmitterSettings.cs

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,22 @@ public sealed class EmitterSettings
6363
/// </summary>
6464
public bool IndentSequences { get; }
6565

66+
/// <summary>
67+
/// If true, then 4-byte UTF-32 characters are broken into two 2-byte code-points.
68+
/// </summary>
69+
/// <remarks>
70+
/// This ensures compatibility with JSON format, as it does not allow '\Uxxxxxxxxx'
71+
/// and instead expects two escaped 2-byte character '\uxxxx\uxxxx'.
72+
/// </remarks>
73+
public bool UseUtf16SurrogatePairs { get; }
74+
6675
public static readonly EmitterSettings Default = new EmitterSettings();
6776

6877
public EmitterSettings()
6978
{
7079
}
7180

72-
public EmitterSettings(int bestIndent, int bestWidth, bool isCanonical, int maxSimpleKeyLength, bool skipAnchorName = false, bool indentSequences = false, string? newLine = null)
81+
public EmitterSettings(int bestIndent, int bestWidth, bool isCanonical, int maxSimpleKeyLength, bool skipAnchorName = false, bool indentSequences = false, string? newLine = null, bool useUtf16SurrogatePairs = false)
7382
{
7483
if (bestIndent < 2 || bestIndent > 9)
7584
{
@@ -93,6 +102,7 @@ public EmitterSettings(int bestIndent, int bestWidth, bool isCanonical, int maxS
93102
SkipAnchorName = skipAnchorName;
94103
IndentSequences = indentSequences;
95104
NewLine = newLine ?? Environment.NewLine;
105+
UseUtf16SurrogatePairs = useUtf16SurrogatePairs;
96106
}
97107

98108
public EmitterSettings WithBestIndent(int bestIndent)
@@ -104,7 +114,8 @@ public EmitterSettings WithBestIndent(int bestIndent)
104114
MaxSimpleKeyLength,
105115
SkipAnchorName,
106116
IndentSequences,
107-
NewLine
117+
NewLine,
118+
UseUtf16SurrogatePairs
108119
);
109120
}
110121

@@ -117,7 +128,8 @@ public EmitterSettings WithBestWidth(int bestWidth)
117128
MaxSimpleKeyLength,
118129
SkipAnchorName,
119130
IndentSequences,
120-
NewLine
131+
NewLine,
132+
UseUtf16SurrogatePairs
121133
);
122134
}
123135

@@ -130,7 +142,8 @@ public EmitterSettings WithMaxSimpleKeyLength(int maxSimpleKeyLength)
130142
maxSimpleKeyLength,
131143
SkipAnchorName,
132144
IndentSequences,
133-
NewLine
145+
NewLine,
146+
UseUtf16SurrogatePairs
134147
);
135148
}
136149

@@ -143,7 +156,8 @@ public EmitterSettings WithNewLine(string newLine)
143156
MaxSimpleKeyLength,
144157
SkipAnchorName,
145158
IndentSequences,
146-
newLine
159+
newLine,
160+
UseUtf16SurrogatePairs
147161
);
148162
}
149163

@@ -167,7 +181,8 @@ public EmitterSettings WithoutAnchorName()
167181
MaxSimpleKeyLength,
168182
true,
169183
IndentSequences,
170-
NewLine
184+
NewLine,
185+
UseUtf16SurrogatePairs
171186
);
172187
}
173188

@@ -180,7 +195,22 @@ public EmitterSettings WithIndentedSequences()
180195
MaxSimpleKeyLength,
181196
SkipAnchorName,
182197
true,
183-
NewLine
198+
NewLine,
199+
UseUtf16SurrogatePairs
200+
);
201+
}
202+
203+
public EmitterSettings WithUtf16SurrogatePairs()
204+
{
205+
return new EmitterSettings(
206+
BestIndent,
207+
BestWidth,
208+
IsCanonical,
209+
MaxSimpleKeyLength,
210+
SkipAnchorName,
211+
IndentSequences,
212+
NewLine,
213+
true
184214
);
185215
}
186216
}

YamlDotNet/Serialization/SerializerBuilder.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@ public SerializerBuilder JsonCompatible()
366366
{
367367
this.emitterSettings = this.emitterSettings
368368
.WithMaxSimpleKeyLength(int.MaxValue)
369-
.WithoutAnchorName();
369+
.WithoutAnchorName()
370+
.WithUtf16SurrogatePairs();
370371

371372
return this
372373
.WithTypeConverter(new GuidConverter(true), w => w.InsteadOf<GuidConverter>())

YamlDotNet/Serialization/StaticSerializerBuilder.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,8 @@ public StaticSerializerBuilder JsonCompatible()
370370
{
371371
this.emitterSettings = this.emitterSettings
372372
.WithMaxSimpleKeyLength(int.MaxValue)
373-
.WithoutAnchorName();
373+
.WithoutAnchorName()
374+
.WithUtf16SurrogatePairs();
374375

375376
return this
376377
.WithTypeConverter(new GuidConverter(true), w => w.InsteadOf<GuidConverter>())

0 commit comments

Comments
 (0)