Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f43d988
Recognize supplementary characters
tats-u Nov 3, 2025
2a851a2
Internatize Rune
tats-u Dec 28, 2025
de5d3fe
Fix failing tests
tats-u Dec 31, 2025
095f0ed
Fix extra comment error
tats-u Dec 31, 2025
948bf66
Remove extra local variable c
tats-u Jan 2, 2026
e968e52
Reorganize classes around Rune
tats-u Jan 2, 2026
bbffa33
Prepare both Rune and char variants / make Rune variant public for .NET
tats-u Jan 2, 2026
3a65e4b
Make APIs in StringSlice.cs public only in modern .NET
tats-u Jan 3, 2026
0f928a2
Throw exception if cannot obtain first Rune
tats-u Jan 3, 2026
a4c9146
Add comments
tats-u Jan 3, 2026
9839b99
Add comment on PeekRuneExtra
tats-u Jan 4, 2026
3ba8a3c
Use `Rune.TryCreate`
tats-u Jan 4, 2026
8ab6542
Remove backtrack
tats-u Jan 4, 2026
f6d6916
Fix parameter name in XML comment
tats-u Jan 4, 2026
03822ac
Don't throw when error in `Rune.DecodeFromUtf16`
tats-u Jan 4, 2026
b9d9e09
Fix RuneAt
tats-u Jan 4, 2026
476fb63
Add tests of Rune-related methods of `StringSlice`
tats-u Jan 4, 2026
4cb6895
Make comment more tolerant of changes
tats-u Jan 4, 2026
e1e58cb
Tweak comment
tats-u Jan 4, 2026
b302cbc
Fix comment
tats-u Jan 5, 2026
a0d08bf
Add `readonly`
tats-u Jan 5, 2026
31f48ac
Move namespace of polyfilled Rune out of System.Text
tats-u Jan 5, 2026
dbcbcf9
Apply suggestions from code review
tats-u Jan 8, 2026
7d4a678
Fix regression by review suggestion
tats-u Jan 8, 2026
b23a002
Merge remote-tracking branch 'origin/master' into rune
tats-u Jan 8, 2026
c048018
Prepare constant for .NET Standard test
tats-u Jan 8, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/Markdig.Tests/TestEmphasisPlus.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,19 @@ public void NormalStrongNormal()
TestParser.TestSpec("normal ***Strong emphasis*** normal", "<p>normal <em><strong>Strong emphasis</strong></em> normal</p>", "");
}

[Test]
public void SupplementaryPunctuation()
{
TestParser.TestSpec("a*a∇*a\n\na*∇a*a\n\na*a𝜵*a\n\na*𝜵a*a\n\na*𐬼a*a\n\na*a𐬼*a", "<p>a*a∇*a</p>\n<p>a*∇a*a</p>\n<p>a*a𝜵*a</p>\n<p>a*𝜵a*a</p>\n<p>a*𐬼a*a</p>\n<p>a*a𐬼*a</p>", "");
}

[Test]
public void RecognizeSupplementaryChars()
{
TestParser.TestSpec("🌶️**𰻞**🍜**𰻞**🌶️**麺**🍜", "<p>🌶️<strong>𰻞</strong>🍜<strong>𰻞</strong>🌶️<strong>麺</strong>🍜</p>", "");
}


[Test]
public void OpenEmphasisHasConvenientContentStringSlice()
{
Expand Down
10 changes: 10 additions & 0 deletions src/Markdig.Tests/TestSmartyPants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,14 @@ public void MappingCanBeReconfigured_HandlesRemovedMappings()

TestParser.TestSpec("<<test>>", "<p>&laquo;test&raquo;</p>", pipeline);
}

[Test]
public void RecognizesSupplementaryCharacters()
{
var pipeline = new MarkdownPipelineBuilder()
.UseSmartyPants()
.Build();

TestParser.TestSpec("\"𝜵\"𠮷\"𝜵\"𩸽\"", "<p>&ldquo;𝜵&ldquo;𠮷&rdquo;𝜵&ldquo;𩸽&rdquo;</p>", pipeline);
}
}
6 changes: 3 additions & 3 deletions src/Markdig/Extensions/SmartyPants/SmartyPantsInlineParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice)
// -- – &ndash; 'ndash'
// --- — &mdash; 'mdash'

var pc = slice.PeekCharExtra(-1);
var pc = slice.PeekRuneExtra(-1);
var c = slice.CurrentChar;
var openingChar = c;

Expand Down Expand Up @@ -93,9 +93,9 @@ public override bool Match(InlineProcessor processor, ref StringSlice slice)
}

// Skip char
c = slice.NextChar();
var next = slice.NextRune();

CharHelper.CheckOpenCloseDelimiter(pc, c, false, out bool canOpen, out bool canClose);
CharHelper.CheckOpenCloseDelimiter(pc, next, false, out bool canOpen, out bool canClose);

bool postProcess = false;

Expand Down
68 changes: 59 additions & 9 deletions src/Markdig/Helpers/CharHelper.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.

using System.Buffers;
using System.Diagnostics;
using System.Globalization;
using System.Runtime.CompilerServices;
using System.Text;

namespace Markdig.Helpers;

Expand Down Expand Up @@ -69,10 +70,10 @@ public static class CharHelper
private static readonly SearchValues<char> s_escapableSymbolChars = SearchValues.Create("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~•");

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsPunctuationException(char c) =>
c is '−' or '-' or '†' or '‡';
private static bool IsPunctuationException(Rune c) =>
c.IsBmp && (char)c.Value is '−' or '-' or '†' or '‡';

public static void CheckOpenCloseDelimiter(char pc, char c, bool enableWithinWord, out bool canOpen, out bool canClose)
public static void CheckOpenCloseDelimiter(Rune pc, Rune c, bool enableWithinWord, out bool canOpen, out bool canClose)
{
pc.CheckUnicodeCategory(out bool prevIsWhiteSpace, out bool prevIsPunctuation);
c.CheckUnicodeCategory(out bool nextIsWhiteSpace, out bool nextIsPunctuation);
Expand Down Expand Up @@ -100,13 +101,13 @@ public static void CheckOpenCloseDelimiter(char pc, char c, bool enableWithinWor
if (!enableWithinWord)
{
var temp = canOpen;
// A single _ character can open emphasis iff it is part of a left-flanking delimiter run and either
// (a) not part of a right-flanking delimiter run or
// A single _ character can open emphasis iff it is part of a left-flanking delimiter run and either
// (a) not part of a right-flanking delimiter run or
// (b) part of a right-flanking delimiter run preceded by punctuation.
canOpen = canOpen && (!canClose || prevIsPunctuation);

// A single _ character can close emphasis iff it is part of a right-flanking delimiter run and either
// (a) not part of a left-flanking delimiter run or
// (a) not part of a left-flanking delimiter run or
// (b) part of a left-flanking delimiter run followed by punctuation.
canClose = canClose && (!temp || nextIsPunctuation);
}
Expand Down Expand Up @@ -199,6 +200,9 @@ public static bool IsWhitespace(this char c)
return IsWhitespaceRare(c);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsWhitespace(this Rune r) => r.IsBmp && IsWhitespace((char)r.Value);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsWhiteSpaceOrZero(this char c)
{
Expand Down Expand Up @@ -263,6 +267,52 @@ public static void CheckUnicodeCategory(this char c, out bool space, out bool pu
}
}

#if !(NETSTANDARD2_1_OR_GREATER || NETCOREAPP2_1_OR_GREATER)
private static Lazy<Func<int, UnicodeCategory>?> GetUnicodeCategoryReflection =
new(() => (Func<int, UnicodeCategory>?)typeof(char).GetMethod("GetUnicodeCategory", System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Static)?.CreateDelegate(
typeof(Func<int, UnicodeCategory>)));
#endif

// Check if a char is a space or a punctuation
public static void CheckUnicodeCategory(this Rune c, out bool space, out bool punctuation)
{
if (IsWhitespace(c))
{
space = true;
punctuation = false;
}
else if (c.Value <= 127)
{
space = c.Value == 0;
punctuation = c.IsBmp && IsAsciiPunctuationOrZero((char)c.Value);
}
else
{
space = false;
punctuation = (CommonMarkPunctuationCategoryMask & (1 <<
#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
(int)CharUnicodeInfo.GetUnicodeCategory(c.Value)
#else
(int)GetUnicodeCategoryFallback(c)
#endif
)) != 0;
}

#if !(NETSTANDARD2_1_OR_GREATER || NETCOREAPP2_1_OR_GREATER)
static UnicodeCategory GetUnicodeCategoryFallback(Rune c)
{
if (c.IsBmp) return CharUnicodeInfo.GetUnicodeCategory((char)c.Value);

if (GetUnicodeCategoryReflection.Value is Func<int, UnicodeCategory> GetUnicodeCategory)
{
return GetUnicodeCategory(c.Value);
}

return CharUnicodeInfo.GetUnicodeCategory(c.ToString(), 0);
}
#endif
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static bool IsSpaceOrPunctuationForGFMAutoLink(char c)
{
Expand Down Expand Up @@ -309,15 +359,15 @@ public static bool IsZero(this char c)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsSpace(this char c)
{
// 2.1 Characters and lines
// 2.1 Characters and lines
// A space is U+0020.
return c == ' ';
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsTab(this char c)
{
// 2.1 Characters and lines
// 2.1 Characters and lines
// A space is U+0009.
return c == '\t';
}
Expand Down
136 changes: 134 additions & 2 deletions src/Markdig/Helpers/StringSlice.cs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.

#nullable disable

using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;

namespace Markdig.Helpers;

Expand Down Expand Up @@ -114,7 +115,7 @@ internal StringSlice(string text, int start, int end, NewLine newLine, bool dumm
public NewLine NewLine;

/// <summary>
/// Gets the current character.
/// Gets the current character .
/// </summary>
public readonly char CurrentChar
{
Expand All @@ -125,6 +126,31 @@ public readonly char CurrentChar
}
}

/// <summary>
/// Gets the current rune (Unicode scalar value). Recognizes supplementary code points that cannot be covered by a single character.
/// </summary>
public readonly Rune CurrentRune
{
get
{
int start = Start;
if (start > End) return default;
var first = Text[start];
if (!char.IsSurrogate(first)) return new Rune(first);
if (char.IsHighSurrogate(first))
{
if (start + 1 > End) return default;
var second = Text[start + 1];
if (!char.IsLowSurrogate(second)) return default;
return new Rune(first, second);
}
if (start < 1) return default;
var trueFirst = Text[start - 1];
if (!char.IsHighSurrogate(trueFirst)) return default;
return new Rune(trueFirst, first);
}
}

/// <summary>
/// Gets a value indicating whether this instance is empty.
/// </summary>
Expand All @@ -145,6 +171,35 @@ public readonly char this[int index]
get => Text[index];
}

/// <summary>
/// Gets the Unicode scalar value (rune) at the specified index relative to the slice.
/// Recognizes supplementary code points that cannot be covered by a single character.
/// </summary>
/// <param name="index">The index relative to the slice.</param>
/// <returns>The rune at the specified index or the default value (refers to <c>'\0'</c>) if the index is out of range or the rune cannot be determined.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Rune RuneAt(int index)
{
var first = Text[index];
if (!char.IsSurrogate(first))
return new Rune(first);
if (char.IsHighSurrogate(first) && index + 1 <= End)
{
var second = Text[index + 1];
if (char.IsLowSurrogate(second))
return new Rune(first, second);
return default;
}
else if (index >= Start + 1)
{
var trueFirst = Text[index - 1];
if (char.IsHighSurrogate(trueFirst))
return new Rune(trueFirst, first);
return default;
}
return default;
}


/// <summary>
/// Goes to the next character, incrementing the <see cref="Start" /> position.
Expand All @@ -166,6 +221,36 @@ public char NextChar()
return Text[start];
}

/// <summary>
/// Goes to the next rune, incrementing the <see cref="Start"/> position.
/// </summary>
/// <returns>
/// The next rune. If none, returns default.
/// </returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Rune NextRune()
{
int start = Start;
if (start >= End)
{
Start = End + 1;
return default;
}
start++;
Start = start;
var first = Text[start];
if (!char.IsSurrogate(first))
return new Rune(first);
if (!char.IsHighSurrogate(first) || start + 1 > End)
return default;
var second = Text[start + 1];
if (!char.IsLowSurrogate(second))
return default;
start++;
Start = start;
return new Rune(first, second);
}

/// <summary>
/// Goes to the next character, incrementing the <see cref="Start" /> position.
/// </summary>
Expand Down Expand Up @@ -244,6 +329,53 @@ public readonly char PeekCharExtra(int offset)
return (uint)index < (uint)text.Length ? text[index] : '\0';
}

/// <summary>
/// Peeks a rune at the specified offset from the current beginning of the slice
/// without using the range <see cref="Start"/> or <see cref="End"/>, returns default if outside the <see cref="Text"/>.
/// Recognizes supplementary code points that cannot be covered by a single character.
/// </summary>
/// <param name="offset">The offset.</param>
/// <returns>The rune at the specified offset, returns default if none.</returns>
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public readonly Rune PeekRuneExtra(int offset)
{
var index = Start + offset;
var text = Text;
if ((uint)index >= (uint)text.Length)
{
return default;
}
var resultOrLowSurrogate = text[index];
if (!char.IsSurrogate(resultOrLowSurrogate))
{
return new Rune(resultOrLowSurrogate);
}
if (!char.IsHighSurrogate(resultOrLowSurrogate))
{
if (index + 1 >= text.Length)
{
return default;
}
var lowSurrogate = text[index + 1];
if (!char.IsLowSurrogate(lowSurrogate))
{
return default;
}
return new Rune(resultOrLowSurrogate, lowSurrogate);
}
if (index <= 1)
{
return default;
}
var highSurrogate = text[index - 1];
if (!char.IsHighSurrogate(highSurrogate))
{
return default;
}
return new Rune(highSurrogate, resultOrLowSurrogate);
}

/// <summary>
/// Matches the specified text.
/// </summary>
Expand Down
4 changes: 4 additions & 0 deletions src/Markdig/Markdig.targets
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@
<PackageReference Include="System.Memory" Version="4.6.0" />
</ItemGroup>

<ItemGroup Condition=" '$(TargetFramework)' == 'net462' OR '$(TargetFramework)' == 'netstandard2.0' OR '$(TargetFramework)' == 'netstandard2.1'">
<PackageReference Include="Shim.System.Text.Rune" Version="6.0.2" />
</ItemGroup>

<ItemGroup>
<None Include="../../img/markdig.png" Pack="true" PackagePath="" />
<None Include="../../readme.md" Pack="true" PackagePath="/"/>
Expand Down
Loading
Loading