@@ -79,6 +79,7 @@ string to_string(ScannerError _errorCode)
79
79
case ScannerError::IllegalExponent: return " Invalid exponent." ;
80
80
case ScannerError::IllegalNumberEnd: return " Identifier-start is not allowed at end of a number." ;
81
81
case ScannerError::OctalNotAllowed: return " Octal numbers not allowed." ;
82
+ case ScannerError::MismatchingDirectionalOverridesInComment: return " Mismatching directional override markers in comment." ;
82
83
default :
83
84
solAssert (false , " Unhandled case in to_string(ScannerError)" );
84
85
return " " ;
@@ -273,10 +274,29 @@ bool Scanner::skipWhitespaceExceptUnicodeLinebreak()
273
274
274
275
Token Scanner::skipSingleLineComment ()
275
276
{
277
+ int rtlOverrideDepth = 0 ;
278
+
276
279
// Line terminator is not part of the comment. If it is a
277
280
// non-ascii line terminator, it will result in a parser error.
278
281
while (!isUnicodeLinebreak ())
279
- if (!advance ()) break ;
282
+ {
283
+ if (tryScanByteSequence (" \xE2\x80\xAD " ) || // U+202D (LRO - Left-to-Right Override)
284
+ tryScanByteSequence (" \xE2\x80\xAE " ) // U+202E (RLO - Right-to-Left Override)
285
+ )
286
+ {
287
+ rtlOverrideDepth++;
288
+ }
289
+ else if (tryScanByteSequence (" \xE2\x80\xAC " )) // U+202C (PDF - Pop Directional Formatting)
290
+ {
291
+ rtlOverrideDepth--;
292
+ }
293
+ else if (!advance ())
294
+ break ;
295
+ }
296
+
297
+ if (rtlOverrideDepth != 0 )
298
+ // Unbalanced RLO/LRO/PDF codepoint sequences in comment.
299
+ return setError (ScannerError::MismatchingDirectionalOverridesInComment);
280
300
281
301
return Token::Whitespace;
282
302
}
@@ -349,18 +369,36 @@ size_t Scanner::scanSingleLineDocComment()
349
369
350
370
Token Scanner::skipMultiLineComment ()
351
371
{
372
+ int rtlOverrideDepth = 0 ;
352
373
while (!isSourcePastEndOfInput ())
353
374
{
354
- char ch = m_char;
355
- advance ();
356
-
357
- // If we have reached the end of the multi-line comment, we
358
- // consume the '/' and insert a whitespace. This way all
359
- // multi-line comments are treated as whitespace.
360
- if (ch == ' *' && m_char == ' /' )
375
+ if (tryScanByteSequence (" \xE2\x80\xAD " ) || // U+202D (LRO - Left-to-Right Override)
376
+ tryScanByteSequence (" \xE2\x80\xAE " ) // U+202E (RLO - Right-to-Left Override)
377
+ )
361
378
{
362
- m_char = ' ' ;
363
- return Token::Whitespace;
379
+ rtlOverrideDepth++;
380
+ }
381
+ else if (tryScanByteSequence (" \xE2\x80\xAC " )) // U+202C (PDF - Pop Directional Formatting)
382
+ {
383
+ rtlOverrideDepth--;
384
+ }
385
+ else
386
+ {
387
+ char ch = m_char;
388
+ advance ();
389
+
390
+ // If we have reached the end of the multi-line comment, we
391
+ // consume the '/' and insert a whitespace. This way all
392
+ // multi-line comments are treated as whitespace.
393
+ if (ch == ' *' && m_char == ' /' )
394
+ {
395
+ if (rtlOverrideDepth != 0 )
396
+ // Unbalanced RLO/LRO/PDF codepoint sequences in comment.
397
+ return setError (ScannerError::MismatchingDirectionalOverridesInComment);
398
+
399
+ m_char = ' ' ;
400
+ return Token::Whitespace;
401
+ }
364
402
}
365
403
}
366
404
// Unterminated multi-line comment.
0 commit comments