Skip to content

Commit debb635

Browse files
authored
Merge pull request #412 from AltGr/fix-utf8
Fix the display of utf8 characters in the code editor
2 parents 90066d0 + e0fa640 commit debb635

File tree

6 files changed

+51
-11
lines changed

6 files changed

+51
-11
lines changed

src/ace-lib/ace.ml

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,6 @@ let read_range range =
4747
((range##.start##.row, range##.start##.column),
4848
(range##.end_##.row, range##.end_##.column))
4949

50-
let get_contents ?range {editor} =
51-
let document = (editor##getSession)##getDocument in
52-
match range with
53-
| None ->
54-
Js.to_string @@ document##getValue
55-
| Some r ->
56-
Js.to_string @@ document##(getTextRange r)
57-
5850
let set_contents ?(reset_undo=false) {editor} code =
5951
let session = editor##getSession in
6052
session##getDocument##setValue (Js.string code);
@@ -71,6 +63,20 @@ let get_line {editor} line =
7163
let document = (editor##getSession)##getDocument in
7264
Js.to_string @@ document##(getLine line)
7365

66+
let get_contents ?range e =
67+
let document = (e.editor##getSession)##getDocument in
68+
match range with
69+
| None ->
70+
Js.to_string @@ document##getValue
71+
| Some r ->
72+
(* Bytes range to utf8 string range conversion *)
73+
let (r1,c1), (r2, c2) = read_range r in
74+
let l1, l2 = get_line e r1, get_line e r2 in
75+
let c1 = Js_utils.pos8_to_pos16 l1 c1 in
76+
let c2 = Js_utils.pos8_to_pos16 l2 c2 in
77+
let r = create_range (create_position r1 c1) (create_position r2 c2) in
78+
Js.to_string @@ document##(getTextRange r)
79+
7480
let create_editor editor_div =
7581
let editor = edit editor_div in
7682
Js.Unsafe.set editor "$blockScrolling" (Js.Unsafe.variable "Infinity");
@@ -122,7 +128,14 @@ let set_mark editor ?loc ?(type_ = Message) msg =
122128
| Some { loc_start = (sr, sc) ; loc_end = (er, ec) } ->
123129
let sr = sr - 1 in
124130
let er = er - 1 in
131+
(* Corrects column positions for unicode strings *)
132+
let sline = get_line editor sr in
133+
let eline = get_line editor er in
134+
let sc = Js_utils.pos8_to_pos16 sline sc in
135+
let ec = Js_utils.pos8_to_pos16 ~stop_before:false eline ec in
136+
(* end position corrections *)
125137
sr, sc, Some (range sr sc er ec) in
138+
126139
let annot : annotation Js.t = Js.Unsafe.obj [||] in
127140
annot##.row := sr;
128141
annot##.column := sc;
@@ -203,8 +216,10 @@ type token = Ace_types.token Js.t
203216
let token ~type_ value =
204217
let obj : Ace_types.token Js.t = Js.Unsafe.obj [||] in
205218
obj##.value := Js.string value;
219+
obj##._val := value;
206220
obj##._type := Js.string type_;
207221
obj
222+
let get_token_val token = token##._val
208223

209224
type doc = Ace_types.document Js.t
210225

src/ace-lib/ace.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ val set_custom_data: 'a editor -> 'a -> unit
8080

8181
type token
8282
val token: type_:string -> string -> token
83+
val get_token_val: token -> string
8384

8485
type 'state helpers = {
8586
initial_state: unit -> 'state;

src/ace-lib/ace_types.mli

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ open Js_of_ocaml
1010

1111
class type token = object
1212
method value : Js.js_string Js.t Js.prop
13+
method _val : string Js.prop
1314
method _type : Js.js_string Js.t Js.prop
1415
end
1516

src/ace-lib/ocaml_mode.ml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,11 +298,18 @@ let get_line_tokens line st row doc =
298298
if !debug_indent > 1 && tok.token <> EOL && tok.token <> ESCAPED_EOL then
299299
IndentBlock.dump block;
300300
let st = { block; lex_ctxt; } in
301-
match tok.token with
302-
| EOL | ESCAPED_EOL ->
301+
let type_ = token_type tok.token in
302+
match tok.token, tokens with
303+
| ILLEGAL_CHAR c, t::toks ->
304+
let t = Ace.token ~type_ ((Ace.get_token_val t)^(String.make 1 c)) in
305+
iter st offset stream (t :: toks)
306+
| STRING_CONTENT, t::toks ->
307+
let t = Ace.token ~type_ ((Ace.get_token_val t)^tok.between^tok.substr) in
308+
iter st offset stream (t :: toks)
309+
| EOL, _ | ESCAPED_EOL, _ ->
303310
(* FIXME some spaces ??? *)
304311
(st, List.rev tokens)
305-
| COMMENT_OPEN_EOL ->
312+
| COMMENT_OPEN_EOL, _ ->
306313
(st, List.rev (comment_open tok.between :: tokens))
307314
| _ ->
308315
iter st offset stream (wrap_token st tok :: tokens)

src/utils/js_utils.ml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,20 @@ let js_debug obj = Firebug.console##(debug obj)
3131
let js_warn obj = Firebug.console##(warn obj)
3232
let js_error obj = Firebug.console##(error obj)
3333

34+
let rec pos8_to_pos16 line c8 i8 i16 stop_before =
35+
if i8 >= c8 || i8 >= String.length line then i16 else
36+
let di8, di16 = match line.[i8] with
37+
| '\x00' .. '\x7F' -> 1, 1
38+
| '\xC2' .. '\xDF' -> 2, 1
39+
| '\xE0' .. '\xEF' -> 3, 1
40+
| '\xF0' .. '\xF4' -> 4, 2
41+
| _ -> 1, 1 in
42+
if stop_before && i8 <= c8 && c8 < i8 + di8 then i16
43+
else pos8_to_pos16 line c8 (i8+di8) (i16+di16) stop_before
44+
45+
let pos8_to_pos16 ?(stop_before = true) line c8 =
46+
pos8_to_pos16 line c8 0 0 stop_before
47+
3448
let log fmt =
3549
Format.kfprintf
3650
(fun _ -> Firebug.console##(log (Js.string (Format.flush_str_formatter ()))))

src/utils/js_utils.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ val js_debug: 'a -> unit
3232
val js_warn: 'a -> unit
3333
val js_error: 'a -> unit
3434

35+
val pos8_to_pos16: ?stop_before:bool -> string -> int -> int
36+
3537
val reload: unit -> unit
3638

3739
(** Gets the language configured in the browser *)

0 commit comments

Comments
 (0)