@@ -28,13 +28,6 @@ function expectSyntaxError(text: string) {
2828}
2929
3030describe ( 'Lexer' , ( ) => {
31- it ( 'disallows uncommon control characters' , ( ) => {
32- expectSyntaxError ( '\u0007' ) . to . deep . equal ( {
33- message : 'Syntax Error: Invalid character: U+0007.' ,
34- locations : [ { line : 1 , column : 1 } ] ,
35- } ) ;
36- } ) ;
37-
3831 it ( 'ignores BOM header' , ( ) => {
3932 expect ( lexOne ( '\uFEFF foo' ) ) . to . contain ( {
4033 kind : TokenKind . NAME ,
@@ -264,12 +257,98 @@ describe('Lexer', () => {
264257 value : 'slashes \\ /' ,
265258 } ) ;
266259
260+ expect ( lexOne ( '"unescaped unicode outside BMP \u{1f600}"' ) ) . to . contain ( {
261+ kind : TokenKind . STRING ,
262+ start : 0 ,
263+ end : 34 ,
264+ value : 'unescaped unicode outside BMP \u{1f600}' ,
265+ } ) ;
266+
267+ expect (
268+ lexOne ( '"unescaped maximal unicode outside BMP \u{10ffff}"' ) ,
269+ ) . to . contain ( {
270+ kind : TokenKind . STRING ,
271+ start : 0 ,
272+ end : 42 ,
273+ value : 'unescaped maximal unicode outside BMP \u{10ffff}' ,
274+ } ) ;
275+
267276 expect ( lexOne ( '"unicode \\u1234\\u5678\\u90AB\\uCDEF"' ) ) . to . contain ( {
268277 kind : TokenKind . STRING ,
269278 start : 0 ,
270279 end : 34 ,
271280 value : 'unicode \u1234\u5678\u90AB\uCDEF' ,
272281 } ) ;
282+
283+ expect ( lexOne ( '"unicode \\u{1234}\\u{5678}\\u{90AB}\\u{CDEF}"' ) ) . to . contain (
284+ {
285+ kind : TokenKind . STRING ,
286+ start : 0 ,
287+ end : 42 ,
288+ value : 'unicode \u1234\u5678\u90AB\uCDEF' ,
289+ } ,
290+ ) ;
291+
292+ expect (
293+ lexOne ( '"string with unicode escape outside BMP \\u{1F600}"' ) ,
294+ ) . to . contain ( {
295+ kind : TokenKind . STRING ,
296+ start : 0 ,
297+ end : 50 ,
298+ value : 'string with unicode escape outside BMP \u{1f600}' ,
299+ } ) ;
300+
301+ expect ( lexOne ( '"string with minimal unicode escape \\u{0}"' ) ) . to . contain ( {
302+ kind : TokenKind . STRING ,
303+ start : 0 ,
304+ end : 42 ,
305+ value : 'string with minimal unicode escape \u{0}' ,
306+ } ) ;
307+
308+ expect (
309+ lexOne ( '"string with maximal unicode escape \\u{10FFFF}"' ) ,
310+ ) . to . contain ( {
311+ kind : TokenKind . STRING ,
312+ start : 0 ,
313+ end : 47 ,
314+ value : 'string with maximal unicode escape \u{10FFFF}' ,
315+ } ) ;
316+
317+ expect (
318+ lexOne ( '"string with maximal minimal unicode escape \\u{00000000}"' ) ,
319+ ) . to . contain ( {
320+ kind : TokenKind . STRING ,
321+ start : 0 ,
322+ end : 57 ,
323+ value : 'string with maximal minimal unicode escape \u{0}' ,
324+ } ) ;
325+
326+ expect (
327+ lexOne ( '"string with unicode surrogate pair escape \\uD83D\\uDE00"' ) ,
328+ ) . to . contain ( {
329+ kind : TokenKind . STRING ,
330+ start : 0 ,
331+ end : 56 ,
332+ value : 'string with unicode surrogate pair escape \u{1f600}' ,
333+ } ) ;
334+
335+ expect (
336+ lexOne ( '"string with minimal surrogate pair escape \\uD800\\uDC00"' ) ,
337+ ) . to . contain ( {
338+ kind : TokenKind . STRING ,
339+ start : 0 ,
340+ end : 56 ,
341+ value : 'string with minimal surrogate pair escape \u{10000}' ,
342+ } ) ;
343+
344+ expect (
345+ lexOne ( '"string with maximal surrogate pair escape \\uDBFF\\uDFFF"' ) ,
346+ ) . to . contain ( {
347+ kind : TokenKind . STRING ,
348+ start : 0 ,
349+ end : 56 ,
350+ value : 'string with maximal surrogate pair escape \u{10FFFF}' ,
351+ } ) ;
273352 } ) ;
274353
275354 it ( 'lex reports useful string errors' , ( ) => {
@@ -299,16 +378,19 @@ describe('Lexer', () => {
299378 locations : [ { line : 1 , column : 1 } ] ,
300379 } ) ;
301380
302- expectSyntaxError ( '"contains unescaped \u0007 control char"' ) . to . deep . equal (
303- {
304- message : 'Syntax Error: Invalid character within String: U+0007.' ,
305- locations : [ { line : 1 , column : 21 } ] ,
306- } ,
307- ) ;
381+ expectSyntaxError ( '"bad surrogate \uDEAD"' ) . to . deep . equal ( {
382+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
383+ locations : [ { line : 1 , column : 16 } ] ,
384+ } ) ;
385+
386+ expectSyntaxError ( '"bad high surrogate pair \uDEAD\uDEAD"' ) . to . deep . equal ( {
387+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
388+ locations : [ { line : 1 , column : 26 } ] ,
389+ } ) ;
308390
309- expectSyntaxError ( '"null-byte is not \u0000 end of file "' ) . to . deep . equal ( {
310- message : 'Syntax Error: Invalid character within String: U+0000 .' ,
311- locations : [ { line : 1 , column : 19 } ] ,
391+ expectSyntaxError ( '"bad low surrogate pair \uD800\uD800 "' ) . to . deep . equal ( {
392+ message : 'Syntax Error: Invalid character within String: U+D800 .' ,
393+ locations : [ { line : 1 , column : 25 } ] ,
312394 } ) ;
313395
314396 expectSyntaxError ( '"multi\nline"' ) . to . deep . equal ( {
@@ -355,6 +437,93 @@ describe('Lexer', () => {
355437 message : 'Syntax Error: Invalid Unicode escape sequence: "\\uXXXF".' ,
356438 locations : [ { line : 1 , column : 6 } ] ,
357439 } ) ;
440+
441+ expectSyntaxError ( '"bad \\u{} esc"' ) . to . deep . equal ( {
442+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{}".' ,
443+ locations : [ { line : 1 , column : 6 } ] ,
444+ } ) ;
445+
446+ expectSyntaxError ( '"bad \\u{FXXX} esc"' ) . to . deep . equal ( {
447+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FX".' ,
448+ locations : [ { line : 1 , column : 6 } ] ,
449+ } ) ;
450+
451+ expectSyntaxError ( '"bad \\u{FFFF esc"' ) . to . deep . equal ( {
452+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FFFF ".' ,
453+ locations : [ { line : 1 , column : 6 } ] ,
454+ } ) ;
455+
456+ expectSyntaxError ( '"bad \\u{FFFF"' ) . to . deep . equal ( {
457+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{FFFF"".' ,
458+ locations : [ { line : 1 , column : 6 } ] ,
459+ } ) ;
460+
461+ expectSyntaxError ( '"too high \\u{110000} esc"' ) . to . deep . equal ( {
462+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{110000}".' ,
463+ locations : [ { line : 1 , column : 11 } ] ,
464+ } ) ;
465+
466+ expectSyntaxError ( '"way too high \\u{12345678} esc"' ) . to . deep . equal ( {
467+ message :
468+ 'Syntax Error: Invalid Unicode escape sequence: "\\u{12345678}".' ,
469+ locations : [ { line : 1 , column : 15 } ] ,
470+ } ) ;
471+
472+ expectSyntaxError ( '"too long \\u{000000000} esc"' ) . to . deep . equal ( {
473+ message :
474+ 'Syntax Error: Invalid Unicode escape sequence: "\\u{000000000".' ,
475+ locations : [ { line : 1 , column : 11 } ] ,
476+ } ) ;
477+
478+ expectSyntaxError ( '"bad surrogate \\uDEAD esc"' ) . to . deep . equal ( {
479+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uDEAD".' ,
480+ locations : [ { line : 1 , column : 16 } ] ,
481+ } ) ;
482+
483+ expectSyntaxError ( '"bad surrogate \\u{DEAD} esc"' ) . to . deep . equal ( {
484+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{DEAD}".' ,
485+ locations : [ { line : 1 , column : 16 } ] ,
486+ } ) ;
487+
488+ expectSyntaxError (
489+ '"cannot use braces for surrogate pair \\u{D83D}\\u{DE00} esc"' ,
490+ ) . to . deep . equal ( {
491+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\u{D83D}".' ,
492+ locations : [ { line : 1 , column : 39 } ] ,
493+ } ) ;
494+
495+ expectSyntaxError (
496+ '"bad high surrogate pair \\uDEAD\\uDEAD esc"' ,
497+ ) . to . deep . equal ( {
498+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uDEAD".' ,
499+ locations : [ { line : 1 , column : 26 } ] ,
500+ } ) ;
501+
502+ expectSyntaxError (
503+ '"bad low surrogate pair \\uD800\\uD800 esc"' ,
504+ ) . to . deep . equal ( {
505+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD800".' ,
506+ locations : [ { line : 1 , column : 25 } ] ,
507+ } ) ;
508+
509+ expectSyntaxError (
510+ '"cannot escape half a pair \uD83D\\uDE00 esc"' ,
511+ ) . to . deep . equal ( {
512+ message : 'Syntax Error: Invalid character within String: U+D83D.' ,
513+ locations : [ { line : 1 , column : 28 } ] ,
514+ } ) ;
515+
516+ expectSyntaxError (
517+ '"cannot escape half a pair \\uD83D\uDE00 esc"' ,
518+ ) . to . deep . equal ( {
519+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD83D".' ,
520+ locations : [ { line : 1 , column : 28 } ] ,
521+ } ) ;
522+
523+ expectSyntaxError ( '"bad \\uD83D\\not an escape"' ) . to . deep . equal ( {
524+ message : 'Syntax Error: Invalid Unicode escape sequence: "\\uD83D".' ,
525+ locations : [ { line : 1 , column : 6 } ] ,
526+ } ) ;
358527 } ) ;
359528
360529 it ( 'lexes block strings' , ( ) => {
@@ -414,6 +583,13 @@ describe('Lexer', () => {
414583 value : 'unescaped \\n\\r\\b\\t\\f\\u1234' ,
415584 } ) ;
416585
586+ expect ( lexOne ( '"""unescaped unicode outside BMP \u{1f600}"""' ) ) . to . contain ( {
587+ kind : TokenKind . BLOCK_STRING ,
588+ start : 0 ,
589+ end : 38 ,
590+ value : 'unescaped unicode outside BMP \u{1f600}' ,
591+ } ) ;
592+
417593 expect ( lexOne ( '"""slashes \\\\ \\/"""' ) ) . to . contain ( {
418594 kind : TokenKind . BLOCK_STRING ,
419595 start : 0 ,
@@ -486,18 +662,9 @@ describe('Lexer', () => {
486662 locations : [ { line : 1 , column : 16 } ] ,
487663 } ) ;
488664
489- expectSyntaxError (
490- '"""contains unescaped \u0007 control char"""' ,
491- ) . to . deep . equal ( {
492- message : 'Syntax Error: Invalid character within String: U+0007.' ,
493- locations : [ { line : 1 , column : 23 } ] ,
494- } ) ;
495-
496- expectSyntaxError (
497- '"""null-byte is not \u0000 end of file"""' ,
498- ) . to . deep . equal ( {
499- message : 'Syntax Error: Invalid character within String: U+0000.' ,
500- locations : [ { line : 1 , column : 21 } ] ,
665+ expectSyntaxError ( '"""contains invalid surrogate \uDEAD"""' ) . to . deep . equal ( {
666+ message : 'Syntax Error: Invalid character within String: U+DEAD.' ,
667+ locations : [ { line : 1 , column : 31 } ] ,
501668 } ) ;
502669 } ) ;
503670
@@ -837,6 +1004,16 @@ describe('Lexer', () => {
8371004 locations : [ { line : 1 , column : 1 } ] ,
8381005 } ) ;
8391006
1007+ expectSyntaxError ( '\x00' ) . to . deep . equal ( {
1008+ message : 'Syntax Error: Unexpected character: U+0000.' ,
1009+ locations : [ { line : 1 , column : 1 } ] ,
1010+ } ) ;
1011+
1012+ expectSyntaxError ( '\b' ) . to . deep . equal ( {
1013+ message : 'Syntax Error: Unexpected character: U+0008.' ,
1014+ locations : [ { line : 1 , column : 1 } ] ,
1015+ } ) ;
1016+
8401017 expectSyntaxError ( '\u00AA' ) . to . deep . equal ( {
8411018 message : 'Syntax Error: Unexpected character: U+00AA.' ,
8421019 locations : [ { line : 1 , column : 1 } ] ,
@@ -851,6 +1028,16 @@ describe('Lexer', () => {
8511028 message : 'Syntax Error: Unexpected character: U+203B.' ,
8521029 locations : [ { line : 1 , column : 1 } ] ,
8531030 } ) ;
1031+
1032+ expectSyntaxError ( '\u{1f600}' ) . to . deep . equal ( {
1033+ message : 'Syntax Error: Unexpected character: U+1F600.' ,
1034+ locations : [ { line : 1 , column : 1 } ] ,
1035+ } ) ;
1036+
1037+ expectSyntaxError ( '\uDEAD' ) . to . deep . equal ( {
1038+ message : 'Syntax Error: Invalid character: U+DEAD.' ,
1039+ locations : [ { line : 1 , column : 1 } ] ,
1040+ } ) ;
8541041 } ) ;
8551042
8561043 it ( 'lex reports useful information for dashes in names' , ( ) => {
@@ -931,9 +1118,15 @@ describe('Lexer', () => {
9311118 end : 9 ,
9321119 value : ' Comment' ,
9331120 } ) ;
934- expectSyntaxError ( '# \u0007' ) . to . deep . equal ( {
935- message : 'Syntax Error: Invalid character: U+0007.' ,
936- locations : [ { line : 1 , column : 3 } ] ,
1121+ expect ( lexOne ( '# Comment \u{1f600}' ) . prev ) . to . contain ( {
1122+ kind : TokenKind . COMMENT ,
1123+ start : 0 ,
1124+ end : 12 ,
1125+ value : ' Comment \u{1f600}' ,
1126+ } ) ;
1127+ expectSyntaxError ( '# Invalid surrogate \uDEAD' ) . to . deep . equal ( {
1128+ message : 'Syntax Error: Invalid character: U+DEAD.' ,
1129+ locations : [ { line : 1 , column : 21 } ] ,
9371130 } ) ;
9381131 } ) ;
9391132} ) ;
0 commit comments