7
7
8
8
open Js_token
9
9
10
+ module Lex_mode = struct
11
+ type t =
12
+ | NORMAL
13
+ | BACKQUOTE
14
+ | REGEXP
15
+ end
16
+
10
17
module Loc = struct
11
18
(* line numbers are 1-indexed; column numbers are 0-indexed *)
12
19
@@ -41,6 +48,7 @@ module Lex_env = struct
41
48
{ lex_source : string option
42
49
; lex_lb : Sedlexing .lexbuf
43
50
; lex_state : lex_state
51
+ ; lex_mode_stack : Lex_mode .t list
44
52
}
45
53
[@@ ocaml.warning "-69" ]
46
54
@@ -55,9 +63,24 @@ module Lex_env = struct
55
63
| "" -> None
56
64
| s -> Some s
57
65
in
58
- { lex_source; lex_lb; lex_state = empty_lex_state }
66
+ { lex_source
67
+ ; lex_lb
68
+ ; lex_state = empty_lex_state
69
+ ; lex_mode_stack = [ Lex_mode. NORMAL ]
70
+ }
59
71
end
60
72
73
+ let push_mode env mode =
74
+ { env with Lex_env. lex_mode_stack = mode :: env .Lex_env .lex_mode_stack }
75
+
76
+ let pop_mode env =
77
+ { env with
78
+ Lex_env. lex_mode_stack =
79
+ (match env.Lex_env. lex_mode_stack with
80
+ | [] -> []
81
+ | _ :: xs -> xs)
82
+ }
83
+
61
84
module Lex_result = struct
62
85
type t =
63
86
{ lex_token : Js_token .t
@@ -424,7 +447,8 @@ let rec string_quote env q buf lexbuf =
424
447
| '\\' , line_terminator_sequence -> string_quote env q buf lexbuf
425
448
| '\\' ->
426
449
let env, str = string_escape env lexbuf in
427
- if String. get q 0 <> String. get str 0 then Buffer. add_string buf " \\ " ;
450
+ if String. equal str " " || String. get q 0 <> String. get str 0
451
+ then Buffer. add_string buf " \\ " ;
428
452
Buffer. add_string buf str;
429
453
string_quote env q buf lexbuf
430
454
| '\n' ->
@@ -444,47 +468,6 @@ let rec string_quote env q buf lexbuf =
444
468
string_quote env q buf lexbuf
445
469
| _ -> failwith " unreachable string_quote"
446
470
447
- let rec template_part env cooked raw literal lexbuf =
448
- match % sedlex lexbuf with
449
- | eof ->
450
- let env = illegal env (loc_of_lexbuf env lexbuf) in
451
- env, true
452
- | '`' ->
453
- Buffer. add_char literal '`' ;
454
- env, true
455
- | "${" ->
456
- Buffer. add_string literal " ${" ;
457
- env, false
458
- | '\\' ->
459
- Buffer. add_char raw '\\' ;
460
- Buffer. add_char literal '\\' ;
461
- let env, str = string_escape env lexbuf in
462
- Buffer. add_string raw str;
463
- Buffer. add_string literal str;
464
- template_part env cooked raw literal lexbuf
465
- (* ECMAScript 6th Syntax, 11.8.6.1 Static Semantics: TV's and TRV's
466
- * Long story short, <LF> is 0xA, <CR> is 0xA, and <CR><LF> is 0xA
467
- * *)
468
- | "\r\n " ->
469
- Buffer. add_string raw " \r\n " ;
470
- Buffer. add_string literal " \r\n " ;
471
- Buffer. add_string cooked " \n " ;
472
- template_part env cooked raw literal lexbuf
473
- | "\n " | "\r " ->
474
- let lf = lexeme lexbuf in
475
- Buffer. add_string raw lf;
476
- Buffer. add_string literal lf;
477
- Buffer. add_char cooked '\n' ;
478
- template_part env cooked raw literal lexbuf
479
- (* match multi-char substrings that don't contain the start chars of the above patterns *)
480
- | Plus (Compl (eof | '`' | '$' | '\\' | '\r' | '\n' )) | any ->
481
- let c = lexeme lexbuf in
482
- Buffer. add_string raw c;
483
- Buffer. add_string literal c;
484
- Buffer. add_string cooked c;
485
- template_part env cooked raw literal lexbuf
486
- | _ -> failwith " unreachable template_part"
487
-
488
471
let token (env : Lex_env.t ) lexbuf : result =
489
472
match % sedlex lexbuf with
490
473
| line_terminator_sequence -> Continue env
@@ -519,15 +502,8 @@ let token (env : Lex_env.t) lexbuf : result =
519
502
, T_STRING (Stdlib.Utf8_string. of_string_exn (Buffer. contents buf), p2 - p1 - 1 )
520
503
)
521
504
| '`' ->
522
- let cooked = Buffer. create 127 in
523
- let raw = Buffer. create 127 in
524
- let literal = Buffer. create 127 in
525
- lexeme_to_buffer lexbuf literal;
526
- let env, is_tail = template_part env cooked raw literal lexbuf in
527
- Token
528
- ( env
529
- , T_TEMPLATE_PART (Stdlib.Utf8_string. of_string_exn (Buffer. contents raw), is_tail)
530
- )
505
+ let env = push_mode env BACKQUOTE in
506
+ Token (env, T_BACKQUOTE )
531
507
| binbigint , word ->
532
508
(* Numbers cannot be immediately followed by words *)
533
509
recover env lexbuf ~f: (fun env lexbuf ->
@@ -632,8 +608,12 @@ let token (env : Lex_env.t) lexbuf : result =
632
608
| _ -> failwith " unreachable token wholenumber" )
633
609
| wholenumber | floatnumber -> Token (env, T_NUMBER (NORMAL , lexeme lexbuf))
634
610
(* Syntax *)
635
- | "{" -> Token (env, T_LCURLY )
636
- | "}" -> Token (env, T_RCURLY )
611
+ | "{" ->
612
+ let env = push_mode env NORMAL in
613
+ Token (env, T_LCURLY )
614
+ | "}" ->
615
+ let env = pop_mode env in
616
+ Token (env, T_RCURLY )
637
617
| "(" -> Token (env, T_LPAREN )
638
618
| ")" -> Token (env, T_RPAREN )
639
619
| "[" -> Token (env, T_LBRACKET )
@@ -811,6 +791,31 @@ let regexp env lexbuf =
811
791
Token (env, T_ERROR (lexeme lexbuf))
812
792
| _ -> failwith " unreachable regexp"
813
793
794
+ (* ****************************************************************************)
795
+ (* Rule backquote *)
796
+ (* ****************************************************************************)
797
+
798
+ let backquote env lexbuf =
799
+ match % sedlex lexbuf with
800
+ | '`' ->
801
+ let env = pop_mode env in
802
+ Token (env, T_BACKQUOTE )
803
+ | "${" ->
804
+ let env = push_mode env NORMAL in
805
+ Token (env, T_DOLLARCURLY )
806
+ | Plus (Compl ('`' | '$' | '\\' )) -> Token (env, T_ENCAPSED_STRING (lexeme lexbuf))
807
+ | '$' -> Token (env, T_ENCAPSED_STRING (lexeme lexbuf))
808
+ | '\\' ->
809
+ let buf = Buffer. create 127 in
810
+ Buffer. add_char buf '\\' ;
811
+ let env, str = string_escape env lexbuf in
812
+ Buffer. add_string buf str;
813
+ Token (env, T_ENCAPSED_STRING (Buffer. contents buf))
814
+ | eof -> Token (env, T_EOF )
815
+ | _ ->
816
+ let env = illegal env (loc_of_lexbuf env lexbuf) in
817
+ Token (env, T_ERROR (lexeme lexbuf))
818
+
814
819
let wrap f =
815
820
let f env =
816
821
let start, _ = Sedlexing. lexing_positions env.Lex_env. lex_lb in
@@ -843,3 +848,11 @@ let wrap f =
843
848
let regexp = wrap regexp
844
849
845
850
let token = wrap token
851
+
852
+ let backquote = wrap backquote
853
+
854
+ let lex env =
855
+ match env.Lex_env. lex_mode_stack with
856
+ | Lex_mode. NORMAL :: _ | [] -> token env
857
+ | Lex_mode. BACKQUOTE :: _ -> backquote env
858
+ | Lex_mode. REGEXP :: _ -> regexp env
0 commit comments