@@ -79,21 +79,6 @@ StateInline.prototype.push = function (type, tag, nesting) {
7979 return token
8080}
8181
82- // Get the last character code before the given position, considering surrogate pairs.
83- function getLastCharCode ( str , pos ) {
84- // treat beginning of the line as a whitespace
85- if ( pos <= 0 ) { return 0x20 }
86- const charCode = str . charCodeAt ( pos - 1 )
87- // not a low-surrogate code unit (is BMP code point)
88- if ( ( charCode & 0xFC00 ) !== 0xDC00 ) { return charCode }
89-
90- // undefined if out of range (typically due to isolated low-surrogate code unit
91- // at the beginning of the string)
92- const codePoint = str . codePointAt ( pos - 2 )
93- // undefined > 0xffff = false, so we don't need extra check here
94- return codePoint > 0xffff ? codePoint : charCode
95- }
96-
9782// Scan a sequence of emphasis-like markers, and determine whether
9883// it can start an emphasis sequence or end an emphasis sequence.
9984//
@@ -104,15 +89,47 @@ StateInline.prototype.scanDelims = function (start, canSplitWord) {
10489 const max = this . posMax
10590 const marker = this . src . charCodeAt ( start )
10691
107- const lastChar = getLastCharCode ( this . src , start )
92+ // Astral characters below are combined manually, because .codePointAt()
93+ // does not guarantee numeric type output. And we don't wish JIT cache issues.
94+ // The broken surrogate pairs are evaluated as U+FFFD to prevent possible
95+ // crashes.
96+
97+ let lastChar
98+ if ( start === 0 ) {
99+ // treat beginning of the line as a whitespace
100+ lastChar = 0x20
101+ } else if ( start === 1 ) {
102+ lastChar = this . src . charCodeAt ( 0 )
103+ if ( ( lastChar & 0xF800 ) === 0xD800 ) { lastChar = 0xFFFD }
104+ } else {
105+ lastChar = this . src . charCodeAt ( start - 1 )
106+ if ( ( lastChar & 0xFC00 ) === 0xDC00 ) {
107+ // low surrogate => add high one, replace broken pair with U+FFFD
108+ const highSurr = this . src . charCodeAt ( start - 2 )
109+ lastChar = ( highSurr & 0xFC00 ) === 0xD800
110+ ? 0x10000 + ( ( highSurr - 0xD800 ) << 10 ) + ( lastChar - 0xDC00 )
111+ : 0xFFFD
112+ } else if ( ( lastChar & 0xFC00 ) === 0xD800 ) {
113+ lastChar = 0xFFFD
114+ }
115+ }
108116
109117 let pos = start
110118 while ( pos < max && this . src . charCodeAt ( pos ) === marker ) { pos ++ }
111119
112120 const count = pos - start
113121
114122 // treat end of the line as a whitespace
115- const nextChar = pos < max ? this . src . codePointAt ( pos ) : 0x20
123+ let nextChar = pos < max ? this . src . charCodeAt ( pos ) : 0x20
124+ if ( ( nextChar & 0xFC00 ) === 0xD800 ) {
125+ // high surrogate => add low one, replace broken pair with U+FFFD
126+ const lowSurr = this . src . charCodeAt ( pos + 1 )
127+ nextChar = ( lowSurr & 0xFC00 ) === 0xDC00
128+ ? 0x10000 + ( ( nextChar - 0xD800 ) << 10 ) + ( lowSurr - 0xDC00 )
129+ : 0xFFFD
130+ } else if ( ( nextChar & 0xFC00 ) === 0xDC00 ) {
131+ nextChar = 0xFFFD
132+ }
116133
117134 const isLastPunctChar = isMdAsciiPunct ( lastChar ) || isPunctChar ( fromCodePoint ( lastChar ) )
118135 const isNextPunctChar = isMdAsciiPunct ( nextChar ) || isPunctChar ( fromCodePoint ( nextChar ) )
0 commit comments