Skip to content

Fix char pattern matching in unicode #5749

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@

- Fix issue where async as an id cannot be used with application and labelled arguments https://github.com/rescript-lang/syntax/issues/707

- Fix 5557 the exhaustive checking for Char is incorrect during the unicode migration https://github.com/rescript-lang/rescript-compiler/pull/5749

- Fix 5753 the comment for unicode char is inaccurate https://github.com/rescript-lang/syntax/pull/709

- Internal changes: the payload of Pconst_char from char to int type safety. https://github.com/rescript-lang/syntax/pull/709

# 10.1.0-rc.2

#### :bug: Bug Fix
Expand Down Expand Up @@ -74,6 +80,7 @@
- Change the internal representation of props for the lowercase components to record. https://github.com/rescript-lang/syntax/pull/665
- Add `JsxPPXReactSupport` module to relocate the helper functions for JSX v4 from `rescript-react`


# 10.1.0-alpha.2

#### :rocket: New Feature
Expand Down
2 changes: 1 addition & 1 deletion jscomp/core/js_dump.ml
Original file line number Diff line number Diff line change
Expand Up @@ -630,7 +630,7 @@ and expression_desc cxt ~(level : int) f x : cxt =
match v with
| Float { f } -> Js_number.caml_float_literal_to_js_string f
(* attach string here for float constant folding?*)
| Int { i; c = Some c } -> Format.asprintf "/* %C */%ld" c i
| Int { i; c = Some c } -> Format.asprintf "/* %s */%ld" (Ext_util.string_of_int_as_char c) i
| Int { i; c = None } ->
Int32.to_string i
(* check , js convention with ocaml lexical convention *)
Expand Down
2 changes: 1 addition & 1 deletion jscomp/core/js_exp_make.mli
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ val method_ :

val econd : ?comment:string -> t -> t -> t -> t

val int : ?comment:string -> ?c:char -> int32 -> t
val int : ?comment:string -> ?c:int -> int32 -> t

val uint32 : ?comment:string -> int32 -> t

Expand Down
2 changes: 1 addition & 1 deletion jscomp/core/js_of_lam_string.ml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ module E = Js_exp_make
currently, it follows the same patten of ocaml, [char] is [int]
*)

let const_char (i : char) = E.int ~c:i (Int32.of_int @@ Char.code i)
let const_char (i : int) = E.int ~c:i (Int32.of_int @@ i)

(* string [s[i]] expects to return a [ocaml_char] *)
let ref_string e e1 = E.string_index e e1
Expand Down
2 changes: 1 addition & 1 deletion jscomp/core/js_of_lam_string.mli
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,6 @@ val ref_byte : J.expression -> J.expression -> J.expression

val set_byte : J.expression -> J.expression -> J.expression -> J.expression

val const_char : char -> J.expression
val const_char : int -> J.expression

val bytes_to_string : J.expression -> J.expression
2 changes: 1 addition & 1 deletion jscomp/core/js_op.ml
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ type float_lit = { f : string } [@@unboxed]

type number =
| Float of float_lit
| Int of { i : int32; c : char option }
| Int of { i : int32; c : int option }
| Uint of int32

(* becareful when constant folding +/-,
Expand Down
4 changes: 2 additions & 2 deletions jscomp/core/lam.ml
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ let prim ~primitive:(prim : Lam_primitive.t) ~args loc : t =
| ( (Pstringrefs | Pstringrefu),
Const_string { s = a; unicode = false },
Const_int { i = b } ) -> (
try Lift.char (String.get a (Int32.to_int b)) with _ -> default ())
try Lift.char (Char.code (String.get a (Int32.to_int b))) with _ -> default ())
| _ -> default ())
| _ -> (
match prim with
Expand Down Expand Up @@ -633,7 +633,7 @@ let rec complete_range (sw_consts : (int * _) list) ~(start : int) ~finish =
let rec eval_const_as_bool (v : Lam_constant.t) : bool =
match v with
| Const_int { i = x } -> x <> 0l
| Const_char x -> Char.code x <> 0
| Const_char x -> x <> 0
| Const_int64 x -> x <> 0L
| Const_js_false | Const_js_null | Const_module_alias | Const_js_undefined ->
false
Expand Down
2 changes: 1 addition & 1 deletion jscomp/core/lam_constant.ml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ type t =
| Const_js_true
| Const_js_false
| Const_int of { i : int32; comment : pointer_info }
| Const_char of char
| Const_char of int
| Const_string of { s : string; unicode : bool }
| Const_float of string
| Const_int64 of int64
Expand Down
2 changes: 1 addition & 1 deletion jscomp/core/lam_constant.mli
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ type t =
| Const_js_true
| Const_js_false
| Const_int of { i : int32; comment : pointer_info }
| Const_char of char
| Const_char of int
| Const_string of { s : string; unicode : bool }
| Const_float of string
| Const_int64 of int64
Expand Down
2 changes: 1 addition & 1 deletion jscomp/core/lam_pass_lets_dce.ml
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ let lets_helper (count_var : Ident.t -> Lam_pass_count.used_info) lam : Lam.t =
|Lconst((Const_int {i})) ->
let i = Int32.to_int i in
if i < String.length l_s && i >= 0 then
Lam.const ((Const_char l_s.[i]))
Lam.const ((Const_char (Char.code l_s.[i])))
else
Lam.prim ~primitive ~args:[l';r'] loc
| _ ->
Expand Down
2 changes: 1 addition & 1 deletion jscomp/core/lam_pass_lets_dce.pp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ let lets_helper (count_var : Ident.t -> Lam_pass_count.used_info) lam : Lam.t =
|Lconst((Const_int {i})) ->
let i = Int32.to_int i in
if i < String.length l_s && i >= 0 then
Lam.const ((Const_char l_s.[i]))
Lam.const ((Const_char (Char.code l_s.[i])))
else
Lam.prim ~primitive ~args:[l';r'] loc
| _ ->
Expand Down
2 changes: 1 addition & 1 deletion jscomp/core/lam_print.ml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ let rec struct_const ppf (cst : Lam_constant.t) =
| Const_module_alias -> fprintf ppf "#alias"
| Const_js_undefined -> fprintf ppf "#undefined"
| Const_int { i } -> fprintf ppf "%ld" i
| Const_char c -> fprintf ppf "%C" c
| Const_char i -> fprintf ppf "%s" (Ext_util.string_of_int_as_char i)
| Const_string { s } -> fprintf ppf "%S" s
| Const_float f -> fprintf ppf "%s" f
| Const_int64 n -> fprintf ppf "%LiL" n
Expand Down
37 changes: 37 additions & 0 deletions jscomp/ext/ext_utf8.ml
Original file line number Diff line number Diff line change
Expand Up @@ -92,3 +92,40 @@ let decode_utf8_string s =

(* let verify s loc =
assert false *)

let encode_codepoint c =
(* reused from syntax/src/res_utf8.ml *)
let h2 = 0b1100_0000 in
let h3 = 0b1110_0000 in
let h4 = 0b1111_0000 in
let cont_mask = 0b0011_1111 in
if c <= 127 then (
let bytes = (Bytes.create [@doesNotRaise]) 1 in
Bytes.unsafe_set bytes 0 (Char.unsafe_chr c);
Bytes.unsafe_to_string bytes)
else if c <= 2047 then (
let bytes = (Bytes.create [@doesNotRaise]) 2 in
Bytes.unsafe_set bytes 0 (Char.unsafe_chr (h2 lor (c lsr 6)));
Bytes.unsafe_set bytes 1
(Char.unsafe_chr (0b1000_0000 lor (c land cont_mask)));
Bytes.unsafe_to_string bytes)
else if c <= 65535 then (
let bytes = (Bytes.create [@doesNotRaise]) 3 in
Bytes.unsafe_set bytes 0 (Char.unsafe_chr (h3 lor (c lsr 12)));
Bytes.unsafe_set bytes 1
(Char.unsafe_chr (0b1000_0000 lor ((c lsr 6) land cont_mask)));
Bytes.unsafe_set bytes 2
(Char.unsafe_chr (0b1000_0000 lor (c land cont_mask)));
Bytes.unsafe_to_string bytes)
else
(* if c <= max then *)
let bytes = (Bytes.create [@doesNotRaise]) 4 in
Bytes.unsafe_set bytes 0 (Char.unsafe_chr (h4 lor (c lsr 18)));
Bytes.unsafe_set bytes 1
(Char.unsafe_chr (0b1000_0000 lor ((c lsr 12) land cont_mask)));
Bytes.unsafe_set bytes 2
(Char.unsafe_chr (0b1000_0000 lor ((c lsr 6) land cont_mask)));
Bytes.unsafe_set bytes 3
(Char.unsafe_chr (0b1000_0000 lor (c land cont_mask)));
Bytes.unsafe_to_string bytes

2 changes: 2 additions & 0 deletions jscomp/ext/ext_utf8.mli
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,5 @@ val next : string -> remaining:int -> int -> int
exception Invalid_utf8 of string

val decode_utf8_string : string -> int list

val encode_codepoint : int -> string
19 changes: 19 additions & 0 deletions jscomp/ext/ext_util.ml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,22 @@ let stats_to_string
num_buckets max_bucket_length
(String.concat ","
(Array.to_list (Array.map string_of_int bucket_histogram)))

let string_of_int_as_char (i : int) : string =
if i <= 255 && i >= 0 then Format.asprintf "%C" (Char.unsafe_chr i)
else
let str =
match Char.unsafe_chr i with
| '\'' -> "\\'"
| '\\' -> "\\\\"
| '\n' -> "\\n"
| '\t' -> "\\t"
| '\r' -> "\\r"
| '\b' -> "\\b"
| ' ' .. '~' as c ->
let s = (Bytes.create [@doesNotRaise]) 1 in
Bytes.unsafe_set s 0 c;
Bytes.unsafe_to_string s
| _ -> Ext_utf8.encode_codepoint i
in
Printf.sprintf "\'%s\'" str
3 changes: 3 additions & 0 deletions jscomp/ext/ext_util.mli
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@
val power_2_above : int -> int -> int

val stats_to_string : Hashtbl.statistics -> string

val string_of_int_as_char : int -> string

2 changes: 1 addition & 1 deletion jscomp/ml/ast_helper.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ module Const = struct
let int64 ?(suffix='L') i = integer ~suffix (Int64.to_string i)
let nativeint ?(suffix='n') i = integer ~suffix (Nativeint.to_string i)
let float ?suffix f = Pconst_float (f, suffix)
let char c = Pconst_char c
let char c = Pconst_char (Char.code c)
let string ?quotation_delimiter s = Pconst_string (s, quotation_delimiter)
end

Expand Down
4 changes: 2 additions & 2 deletions jscomp/ml/asttypes.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

type constant =
Const_int of int
| Const_char of char
| Const_char of int
| Const_string of string * string option
| Const_float of string
| Const_int32 of int32
Expand Down Expand Up @@ -70,4 +70,4 @@ let same_arg_label (x : arg_label) y =
begin match y with
| Optional s0 -> s = s0
| _ -> false
end
end
2 changes: 1 addition & 1 deletion jscomp/ml/matching.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2202,7 +2202,7 @@ let combine_constant names loc arg cst partial ctx def
call_switcher loc fail arg min_int max_int int_lambda_list names
| Const_char _ ->
let int_lambda_list =
List.map (function Const_char c, l -> (Char.code c, l)
List.map (function Const_char c, l -> (c, l)
| _ -> assert false)
const_lambda_list in
call_switcher loc fail arg 0 max_int int_lambda_list names
Expand Down
14 changes: 10 additions & 4 deletions jscomp/ml/parmatch.ml
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ let is_cons = function

let pretty_const c = match c with
| Const_int i -> Printf.sprintf "%d" i
| Const_char c -> Printf.sprintf "%C" c
| Const_char i -> Printf.sprintf "%s" (Pprintast.string_of_int_as_char i)
| Const_string (s, _) -> Printf.sprintf "%S" s
| Const_float f -> Printf.sprintf "%s" f
| Const_int32 i -> Printf.sprintf "%ldl" i
Expand Down Expand Up @@ -1037,7 +1037,7 @@ let build_other_constant proj make first next p env =

let some_other_tag = "<some other tag>"

let build_other ext env = match env with
let build_other ext env : Typedtree.pattern = match env with
| ({pat_desc = Tpat_construct (lid, {cstr_tag=Cstr_extension _},_)},_) :: _ ->
(* let c = {c with cstr_name = "*extension*"} in *) (* PR#7330 *)
make_pat (Tpat_var (Ident.create "*extension*",
Expand Down Expand Up @@ -1079,13 +1079,19 @@ let build_other ext env = match env with
make_pat (Tpat_or (pat, p_res, None)) p.pat_type p.pat_env)
pat other_pats
end
| ({pat_desc=(Tpat_constant (Const_int _ | Const_char _))} as p,_) :: _ ->
| ({pat_desc=(Tpat_constant (Const_int _ ))} as p,_) :: _ ->
build_other_constant
(function Tpat_constant(Const_int i) -> i
| Tpat_constant (Const_char i) -> Char.code i
| _ -> assert false)
(function i -> Tpat_constant(Const_int i))
0 succ p env
| ({pat_desc=(Tpat_constant (Const_char _ ))} as p,_) :: _ ->
build_other_constant
(function
| Tpat_constant (Const_char i) -> i
| _ -> assert false)
(function i -> Tpat_constant(Const_char i))
0 succ p env
| ({pat_desc=(Tpat_constant (Const_int32 _))} as p,_) :: _ ->
build_other_constant
(function Tpat_constant(Const_int32 i) -> i | _ -> assert false)
Expand Down
2 changes: 1 addition & 1 deletion jscomp/ml/parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -11015,7 +11015,7 @@ let yyact = [|
let _1 = (Parsing.peek_val __caml_parser_env 0 : char) in
Obj.repr(
# 2155 "ml/parser.mly"
( Pconst_char _1 )
( Pconst_char (Char.code _1) )
# 11020 "ml/parser.ml"
: 'constant))
; (fun __caml_parser_env ->
Expand Down
2 changes: 1 addition & 1 deletion jscomp/ml/parser.mly
Original file line number Diff line number Diff line change
Expand Up @@ -2152,7 +2152,7 @@ label:

constant:
| INT { let (n, m) = $1 in Pconst_integer (n, m) }
| CHAR { Pconst_char $1 }
| CHAR { Pconst_char (Char.code $1) }
| STRING { let (s, d) = $1 in Pconst_string (s, d) }
| FLOAT { let (f, m) = $1 in Pconst_float (f, m) }
;
Expand Down
2 changes: 1 addition & 1 deletion jscomp/ml/parsetree.ml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ type constant =
Suffixes [g-z][G-Z] are accepted by the parser.
Suffixes except 'l', 'L' and 'n' are rejected by the typechecker
*)
| Pconst_char of char
| Pconst_char of int
(* 'c' *)
| Pconst_string of string * string option
(* "constant"
Expand Down
8 changes: 5 additions & 3 deletions jscomp/ml/pprintast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,10 @@ let rec longident f = function

let longident_loc f x = pp f "%a" longident x.txt

let string_of_int_as_char i = Ext_util.string_of_int_as_char i

let constant f = function
| Pconst_char i -> pp f "%C" i
| Pconst_char i -> pp f "%s" (string_of_int_as_char i)
| Pconst_string (i, None) -> pp f "%S" i
| Pconst_string (i, Some delim) -> pp f "{%s|%s|%s}" delim i delim
| Pconst_integer (i, None) -> paren (i.[0]='-') (fun f -> pp f "%s") f i
Expand Down Expand Up @@ -770,7 +772,7 @@ and value_description ctxt f x =
pp f "@[<hov2>%a%a@]" (core_type ctxt) x.pval_type
(fun f x ->

# 772 "ml/pprintast.pp.ml"
# 774 "ml/pprintast.pp.ml"
match x.pval_prim with
| first :: second :: _
when Ext_string.first_marshal_char second
Expand All @@ -783,7 +785,7 @@ and value_description ctxt f x =
pp f "@ =@ %a" (list constant_string) x.pval_prim


# 787 "ml/pprintast.pp.ml"
# 789 "ml/pprintast.pp.ml"
) x

and extension ctxt f (s, e) =
Expand Down
1 change: 1 addition & 0 deletions jscomp/ml/pprintast.mli
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ val pattern: Format.formatter -> Parsetree.pattern -> unit
val signature: Format.formatter -> Parsetree.signature -> unit
val structure: Format.formatter -> Parsetree.structure -> unit
val string_of_structure: Parsetree.structure -> string
val string_of_int_as_char: int -> string
4 changes: 3 additions & 1 deletion jscomp/ml/pprintast.pp.ml
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,10 @@ let rec longident f = function

let longident_loc f x = pp f "%a" longident x.txt

let string_of_int_as_char i = Ext_util.string_of_int_as_char i

let constant f = function
| Pconst_char i -> pp f "%C" i
| Pconst_char i -> pp f "%s" (string_of_int_as_char i)
| Pconst_string (i, None) -> pp f "%S" i
| Pconst_string (i, Some delim) -> pp f "{%s|%s|%s}" delim i delim
| Pconst_integer (i, None) -> paren (i.[0]='-') (fun f -> pp f "%s") f i
Expand Down
2 changes: 1 addition & 1 deletion jscomp/ml/printast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ let fmt_char_option f = function
let fmt_constant f x =
match x with
| Pconst_integer (i,m) -> fprintf f "PConst_int (%s,%a)" i fmt_char_option m;
| Pconst_char (c) -> fprintf f "PConst_char %02x" (Char.code c);
| Pconst_char (c) -> fprintf f "PConst_char %02x" c;
| Pconst_string (s, None) -> fprintf f "PConst_string(%S,None)" s;
| Pconst_string (s, Some delim) ->
fprintf f "PConst_string (%S,Some %S)" s delim;
Expand Down
2 changes: 1 addition & 1 deletion jscomp/ml/printlambda.ml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ open Lambda

let rec struct_const ppf = function
| Const_base(Const_int n) -> fprintf ppf "%i" n
| Const_base(Const_char c) -> fprintf ppf "%C" c
| Const_base(Const_char i) -> fprintf ppf "%s" (Pprintast.string_of_int_as_char i)
| Const_base(Const_string (s, _)) -> fprintf ppf "%S" s
| Const_immstring s -> fprintf ppf "#%S" s
| Const_base(Const_float f) -> fprintf ppf "%s" f
Expand Down
2 changes: 1 addition & 1 deletion jscomp/ml/printtyped.ml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ let fmt_path f x = fprintf f "\"%a\"" fmt_path_aux x;;
let fmt_constant f x =
match x with
| Const_int (i) -> fprintf f "Const_int %d" i;
| Const_char (c) -> fprintf f "Const_char %02x" (Char.code c);
| Const_char (c) -> fprintf f "Const_char %02x" c;
| Const_string (s, None) -> fprintf f "Const_string(%S,None)" s;
| Const_string (s, Some delim) ->
fprintf f "Const_string (%S,Some %S)" s delim;
Expand Down
2 changes: 1 addition & 1 deletion jscomp/ml/typecore.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ and type_pat_aux ~constrs ~labels ~no_existentials ~mode ~explode ~env
else
or_ ~loc:gloc
(constant ~loc:gloc (Pconst_char c1))
(loop (Char.chr(Char.code c1 + 1)) c2)
(loop (c1 + 1) c2)
in
let p = if c1 <= c2 then loop c1 c2 else loop c2 c1 in
let p = {p with ppat_loc=loc} in
Expand Down
1 change: 1 addition & 0 deletions jscomp/napkin/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#### :nail_care Polish

- Change the internal representation of props for the lowercase components to record. https://github.com/rescript-lang/syntax/pull/665
- Change the payload of Pconst_char for type safety. https://github.com/rescript-lang/rescript-compiler/pull/5759

## ReScript 10.0

Expand Down
Loading