11// Copyright (c) Alexandre Mutel. All rights reserved.
2- // This file is licensed under the BSD-Clause 2 license.
2+ // This file is licensed under the BSD-Clause 2 license.
33// See the license.txt file in the project root for more information.
44
55#nullable disable
66
77using System . Runtime . CompilerServices ;
88using System . Runtime . InteropServices ;
9+ using System . Text ;
910
1011namespace Markdig . Helpers ;
1112
@@ -114,7 +115,7 @@ internal StringSlice(string text, int start, int end, NewLine newLine, bool dumm
114115 public NewLine NewLine ;
115116
116117 /// <summary>
117- /// Gets the current character.
118+ /// Gets the current character .
118119 /// </summary>
119120 public readonly char CurrentChar
120121 {
@@ -125,6 +126,31 @@ public readonly char CurrentChar
125126 }
126127 }
127128
129+ /// <summary>
130+ /// Gets the current rune (Unicode scalar value). Recognizes supplementary code points that cannot be covered by a single character.
131+ /// </summary>
132+ public readonly Rune CurrentRune
133+ {
134+ get
135+ {
136+ int start = Start ;
137+ if ( start > End ) return default ;
138+ var first = Text [ start ] ;
139+ if ( ! char . IsSurrogate ( first ) ) return new Rune ( first ) ;
140+ if ( char . IsHighSurrogate ( first ) )
141+ {
142+ if ( start + 1 > End ) return default ;
143+ var second = Text [ start + 1 ] ;
144+ if ( ! char . IsLowSurrogate ( second ) ) return default ;
145+ return new Rune ( first , second ) ;
146+ }
147+ if ( start < 1 ) return default ;
148+ var trueFirst = Text [ start - 1 ] ;
149+ if ( ! char . IsHighSurrogate ( trueFirst ) ) return default ;
150+ return new Rune ( trueFirst , first ) ;
151+ }
152+ }
153+
128154 /// <summary>
129155 /// Gets a value indicating whether this instance is empty.
130156 /// </summary>
@@ -145,6 +171,35 @@ public readonly char this[int index]
145171 get => Text [ index ] ;
146172 }
147173
174+ /// <summary>
175+ /// Gets the Unicode scalar value (rune) at the specified index relative to the slice.
176+ /// Recognizes supplementary code points that cannot be covered by a single character.
177+ /// </summary>
178+ /// <param name="index">The index relative to the slice.</param>
179+ /// <returns>The rune at the specified index or the default value (refers to <c>'\0'</c>) if the index is out of range or the rune cannot be determined.</returns>
180+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
181+ public Rune RuneAt ( int index )
182+ {
183+ var first = Text [ index ] ;
184+ if ( ! char . IsSurrogate ( first ) )
185+ return new Rune ( first ) ;
186+ if ( char . IsHighSurrogate ( first ) && index + 1 <= End )
187+ {
188+ var second = Text [ index + 1 ] ;
189+ if ( char . IsLowSurrogate ( second ) )
190+ return new Rune ( first , second ) ;
191+ return default ;
192+ }
193+ else if ( index >= Start + 1 )
194+ {
195+ var trueFirst = Text [ index - 1 ] ;
196+ if ( char . IsHighSurrogate ( trueFirst ) )
197+ return new Rune ( trueFirst , first ) ;
198+ return default ;
199+ }
200+ return default ;
201+ }
202+
148203
149204 /// <summary>
150205 /// Goes to the next character, incrementing the <see cref="Start" /> position.
@@ -166,6 +221,36 @@ public char NextChar()
166221 return Text [ start ] ;
167222 }
168223
224+ /// <summary>
225+ /// Goes to the next rune, incrementing the <see cref="Start"/> position.
226+ /// </summary>
227+ /// <returns>
228+ /// The next rune. If none, returns default.
229+ /// </returns>
230+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
231+ public Rune NextRune ( )
232+ {
233+ int start = Start ;
234+ if ( start >= End )
235+ {
236+ Start = End + 1 ;
237+ return default ;
238+ }
239+ start ++ ;
240+ Start = start ;
241+ var first = Text [ start ] ;
242+ if ( ! char . IsSurrogate ( first ) )
243+ return new Rune ( first ) ;
244+ if ( ! char . IsHighSurrogate ( first ) || start + 1 > End )
245+ return default ;
246+ var second = Text [ start + 1 ] ;
247+ if ( ! char . IsLowSurrogate ( second ) )
248+ return default ;
249+ start ++ ;
250+ Start = start ;
251+ return new Rune ( first , second ) ;
252+ }
253+
169254 /// <summary>
170255 /// Goes to the next character, incrementing the <see cref="Start" /> position.
171256 /// </summary>
@@ -244,6 +329,53 @@ public readonly char PeekCharExtra(int offset)
244329 return ( uint ) index < ( uint ) text . Length ? text [ index ] : '\0 ' ;
245330 }
246331
332+ /// <summary>
333+ /// Peeks a rune at the specified offset from the current beginning of the slice
334+ /// without using the range <see cref="Start"/> or <see cref="End"/>, returns default if outside the <see cref="Text"/>.
335+ /// Recognizes supplementary code points that cannot be covered by a single character.
336+ /// </summary>
337+ /// <param name="offset">The offset.</param>
338+ /// <returns>The rune at the specified offset, returns default if none.</returns>
339+ ///
340+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
341+ public readonly Rune PeekRuneExtra ( int offset )
342+ {
343+ var index = Start + offset ;
344+ var text = Text ;
345+ if ( ( uint ) index >= ( uint ) text . Length )
346+ {
347+ return default ;
348+ }
349+ var resultOrLowSurrogate = text [ index ] ;
350+ if ( ! char . IsSurrogate ( resultOrLowSurrogate ) )
351+ {
352+ return new Rune ( resultOrLowSurrogate ) ;
353+ }
354+ if ( ! char . IsHighSurrogate ( resultOrLowSurrogate ) )
355+ {
356+ if ( index + 1 >= text . Length )
357+ {
358+ return default ;
359+ }
360+ var lowSurrogate = text [ index + 1 ] ;
361+ if ( ! char . IsLowSurrogate ( lowSurrogate ) )
362+ {
363+ return default ;
364+ }
365+ return new Rune ( resultOrLowSurrogate , lowSurrogate ) ;
366+ }
367+ if ( index <= 1 )
368+ {
369+ return default ;
370+ }
371+ var highSurrogate = text [ index - 1 ] ;
372+ if ( ! char . IsHighSurrogate ( highSurrogate ) )
373+ {
374+ return default ;
375+ }
376+ return new Rune ( highSurrogate , resultOrLowSurrogate ) ;
377+ }
378+
247379 /// <summary>
248380 /// Matches the specified text.
249381 /// </summary>
0 commit comments