Bigstring optimizations by vouillon · Pull Request #2144 · ocsigen/js_of_ocaml

vouillon · 2026-01-20T12:38:26Z

No description provided.

Read several characters at a time.

…engines Importing this function by Function.prototype.call.bind(Dataview.prototype.getInt32) is optimized in V8 but is much slower than calling it through a JavaScript function with other browsers.

adrien-n · 2026-01-20T15:41:04Z

Hi and thanks a lot for your work.

Since my chromium is borked on my laptop, I did my tests on a raspberry pi 4.

	Chromium	Firefox
WASM baseline	20s	67s
WASM this PR	13s	18s
JS	5s	13s

This PR results in a very large speedup as you can see.

I've profiled the worker in both firefox and chromium. The functions called change a lot and I'm not seeing references to functions under JS:: in firefox, only to wasm-function.

On my laptop, the runtime gets pretty close to the runtime of the JS version (+20% or so compared to +400% before). Thanks again!

hhugo · 2026-02-02T11:43:46Z

@vouillon, what's the status of https://github.com/vouillon/wax ? Can it be used to review this change ?

vouillon · 2026-02-02T13:04:23Z

I think it is usable if you want to give it a try. It does not understand conditional directives @if yet, but bigstring.wat does not contain any. It is also more strict than Binaryen, so you get an error with wax ~/js_of_ocaml/runtime/wasm/bigstring.wat -v -f wax if you don't fix these two lines:

--- a/runtime/wasm/bigstring.wat
+++ b/runtime/wasm/bigstring.wat
@@ -154,7 +154,8 @@
                (local.set $i (i32.add (local.get $i) (i32.const 1)))
                (br_if $loop (i32.eq (local.get $c1) (local.get $c2)))
                (return
-                  (select (ref.i31 (i32.const -1)) (ref.i31 (i32.const 1))
+                  (select (result (ref eq))
+                     (ref.i31 (i32.const -1)) (ref.i31 (i32.const 1))
                      (i32.lt_u (local.get $c1) (local.get $c2)))))))
       (ref.i31 (i32.const 0)))
 
@@ -183,7 +184,8 @@
                (local.set $i (i32.add (local.get $i) (i32.const 1)))
                (br_if $loop (i32.eq (local.get $c1) (local.get $c2)))
                (return
-                  (select (ref.i31 (i32.const -1)) (ref.i31 (i32.const 1))
+                  (select (result (ref eq))
+                     (ref.i31 (i32.const -1)) (ref.i31 (i32.const 1))
                      (i32.lt_u (local.get $c1) (local.get $c2)))))))
       (ref.i31 (i32.const 0)))

hhugo · 2026-02-15T20:51:28Z

Here is the diff after wax

$ patdiff before.rst after.rst 
------ before.rst
++++++ after.rst
@|-86,132 +86,278 ============================================================
 |}
 |#[export = "caml_bigstring_memset"]
 |fn caml_bigstring_memset(s: &eq, pos: &eq, len: &eq, v: &eq) -> &eq {
 |    become caml_ba_fill(caml_ba_sub(s, pos, len), v);
 |}
 |#[export = "caml_bigstring_memcmp"]
 |fn caml_bigstring_memcmp(s1: &eq, vpos1: &eq, s2: &eq, vpos2: &eq, vlen: &eq)
 |-> &eq {
 |    let i: i32;
 |    let pos1: i32;
 |    let pos2: i32;
 |    let len: i32;
 |    let c1: i32;
 |    let c2: i32;
 |    let v1: &extern;
 |    let v2: &extern;
+|    let w1: i32;
+|    let w2: i32;
+|    let xored: i32;
 |    v1 = caml_ba_get_view(s1);
 |    pos1 = vpos1 as &i31 as i32_s;
 |    v2 = caml_ba_get_view(s2);
 |    pos2 = vpos2 as &i31 as i32_s;
 |    len = vlen as &i31 as i32_s;
+|    'done: do {
+|        'loop: loop {
+|            br_if 'done i + 4 >u len;
+|            w1 = dv_get_i32_unaligned(v1, pos1 + i, 1);
+|            w2 = dv_get_i32_unaligned(v2, pos2 + i, 1);
+|            if w1 == w2 { i = i + 4; br 'loop; }
+|            xored = w1 ^ w2;
+|            if xored & 0xFF {
+|                c1 = w1 & 0xFF;
+|                c2 = w2 & 0xFF;
+|                return c1 <u c2?-1 as &i31:1 as &i31;
+|            }
+|            if xored & 0xFF00 {
+|                c1 = w1 >>u 8 & 0xFF;
+|                c2 = w2 >>u 8 & 0xFF;
+|                return c1 <u c2?-1 as &i31:1 as &i31;
+|            }
+|            if xored & 0xFF0000 {
+|                c1 = w1 >>u 16 & 0xFF;
+|                c2 = w2 >>u 16 & 0xFF;
+|                return c1 <u c2?-1 as &i31:1 as &i31;
+|            }
+|            c1 = w1 >>u 24;
+|            c2 = w2 >>u 24;
+|            return c1 <u c2?-1 as &i31:1 as &i31;
+|        } /* 'loop */
+|    } /* 'done */
 |    'loop: loop {
 |        if i <u len {
 |            c1 = dv_get_ui8(v1, pos1 + i);
 |            c2 = dv_get_ui8(v2, pos2 + i);
 |            i = i + 1;
 |            br_if 'loop c1 == c2;
 |            return c1 <u c2?-1 as &i31:1 as &i31;
 |        }
 |    } /* 'loop */
 |    0 as &i31;
 |}
 |#[export = "caml_bigstring_memcmp_string"]
 |fn caml_bigstring_memcmp_string
 |(s1: &eq, vpos1: &eq, vs2: &eq, vpos2: &eq, vlen: &eq) -> &eq {
 |    let i: i32;
 |    let pos1: i32;
 |    let pos2: i32;
 |    let len: i32;
 |    let c1: i32;
 |    let c2: i32;
 |    let v1: &extern;
 |    let s2: &bytes;
+|    let w1: i32;
+|    let w2: i32;
+|    let xored: i32;
+|    let j: i32;
 |    v1 = caml_ba_get_view(s1);
 |    pos1 = vpos1 as &i31 as i32_s;
 |    s2 = vs2 as &bytes;
 |    pos2 = vpos2 as &i31 as i32_s;
 |    len = vlen as &i31 as i32_s;
+|    'done: do {
+|        'loop: loop {
+|            br_if 'done i + 4 >u len;
+|            w1 = dv_get_i32_unaligned(v1, pos1 + i, 1);
+|            j = pos2 + i;
+|            w2 =
+|                s2[j] as i32_u | s2[j + 1] as i32_u << 8 |
+|                    (s2[j + 2] as i32_u << 16 | s2[j + 3] as i32_u << 24);
+|            if w1 == w2 { i = i + 4; br 'loop; }
+|            xored = w1 ^ w2;
+|            if xored & 0xFF {
+|                c1 = w1 & 0xFF;
+|                c2 = w2 & 0xFF;
+|                return c1 <u c2?-1 as &i31:1 as &i31;
+|            }
+|            if xored & 0xFF00 {
+|                c1 = w1 >>u 8 & 0xFF;
+|                c2 = w2 >>u 8 & 0xFF;
+|                return c1 <u c2?-1 as &i31:1 as &i31;
+|            }
+|            if xored & 0xFF0000 {
+|                c1 = w1 >>u 16 & 0xFF;
+|                c2 = w2 >>u 16 & 0xFF;
+|                return c1 <u c2?-1 as &i31:1 as &i31;
+|            }
+|            c1 = w1 >>u 24;
+|            c2 = w2 >>u 24;
+|            return c1 <u c2?-1 as &i31:1 as &i31;
+|        } /* 'loop */
+|    } /* 'done */
 |    'loop: loop {
 |        if i <u len {
 |            c1 = dv_get_ui8(v1, pos1 + i);
 |            c2 = s2[pos2 + i] as i32_u;
 |            i = i + 1;
 |            br_if 'loop c1 == c2;
 |            return c1 <u c2?-1 as &i31:1 as &i31;
 |        }
 |    } /* 'loop */
 |    0 as &i31;
 |}
 |#[export = "caml_bigstring_memchr"]
 |fn caml_bigstring_memchr(s: &eq, vc: &eq, vpos: &eq, vlen: &eq) -> &eq {
 |    let pos: i32;
 |    let len: i32;
 |    let c: i32;
 |    let v: &extern;
+|    let mask: i32;
+|    let word: i32;
+|    let xored: i32;
 |    c = vc as &i31 as i32_s;
 |    pos = vpos as &i31 as i32_s;
 |    len = vlen as &i31 as i32_s;
 |    v = caml_ba_get_view(s);
+|    mask = c * 0x01010101;
+|    'done: do {
+|        'loop: loop {
+|            br_if 'done len <s 4;
+|            word = dv_get_i32_unaligned(v, pos, 1);
+|            xored = word ^ mask;
+|            if xored - 0x01010101 & (xored ^ -1) & 0x80808080 {
+|                if !(xored & 0xFF) { return pos as &i31; }
+|                if !(xored & 0xFF00) { return (pos + 1) as &i31; }
+|                if !(xored & 0xFF0000) { return (pos + 2) as &i31; }
+|                return (pos + 3) as &i31;
+|            }
+|            pos = pos + 4;
+|            len = len - 4;
+|            br 'loop;
+|        } /* 'loop */
+|    } /* 'done */
 |    'loop: loop {
 |        if len >s 0 {
 |            if c == dv_get_ui8(v, pos) { return pos as &i31; }
 |            len = len - 1;
 |            pos = pos + 1;
 |            br 'loop;
 |        }
 |    } /* 'loop */
 |    -1 as &i31;
 |}
 |#[export = "caml_bigstring_memrchr"]
 |fn caml_bigstring_memrchr(s: &eq, vc: &eq, vpos: &eq, vlen: &eq) -> &eq {
 |    let pos: i32;
 |    let len: i32;
 |    let c: i32;
 |    let cur: i32;
 |    let v: &extern;
+|    let mask: i32;
+|    let word: i32;
+|    let xored: i32;
 |    c = vc as &i31 as i32_s;
 |    pos = vpos as &i31 as i32_s;
 |    len = vlen as &i31 as i32_s;
 |    v = caml_ba_get_view(s);
 |    cur = pos + len - 1;
+|    mask = c * 0x01010101;
+|    'loop: loop {
+|        if cur - pos >=s 3 {
+|            word = dv_get_i32_unaligned(v, cur - 3, 1);
+|            xored = word ^ mask;
+|            if xored - 0x01010101 & (xored ^ -1) & 0x80808080 {
+|                if !(xored & 0xFF000000) { return cur as &i31; }
+|                if !(xored & 0xFF0000) { return (cur - 1) as &i31; }
+|                if !(xored & 0xFF00) { return (cur - 2) as &i31; }
+|                return (cur - 3) as &i31;
+|            }
+|            cur = cur - 4;
+|            br 'loop;
+|        }
+|    } /* 'loop */
 |    'loop: loop {
 |        if cur >=s pos {
 |            if c == dv_get_ui8(v, cur) { return cur as &i31; }
 |            cur = cur - 1;
 |            br 'loop;
 |        }
 |    } /* 'loop */
 |    -1 as &i31;
 |}
 |#[export = "caml_bigstring_strncmp"]
 |fn caml_bigstring_strncmp
 |(vs1: &eq, vpos1: &eq, vs2: &eq, vpos2: &eq, vlen: &eq) -> &eq {
 |    let v1: &extern;
 |    let v2: &extern;
 |    let pos1: i32;
 |    let pos2: i32;
 |    let len: i32;
 |    let i: i32;
 |    let c1: i32;
 |    let c2: i32;
+|    let w1: i32;
+|    let w2: i32;
+|    let xored: i32;
 |    v1 = caml_ba_get_view(vs1);
 |    v2 = caml_ba_get_view(vs2);
 |    pos1 = vpos1 as &i31 as i32_s;
 |    pos2 = vpos2 as &i31 as i32_s;
 |    len = vlen as &i31 as i32_s;
+|    'done: do {
+|        'loop: loop {
+|            br_if 'done i + 4 >u len;
+|            w1 = dv_get_i32_unaligned(v1, pos1 + i, 1);
+|            w2 = dv_get_i32_unaligned(v2, pos2 + i, 1);
+|            if w1 == w2 {
+|                if w1 - 0x01010101 & (w1 ^ -1) & 0x80808080 {
+|                    return 0 as &i31;
+|                }
+|                i = i + 4;
+|                br 'loop;
+|            }
+|            xored = w1 ^ w2;
+|            c1 = w1 & 0xFF;
+|            c2 = w2 & 0xFF;
+|            if xored & 0xFF | !c1 {
+|                if c1 <u c2 { return -1 as &i31; }
+|                if c1 >u c2 { return 1 as &i31; }
+|                return 0 as &i31;
+|            }
+|            c1 = w1 >>u 8 & 0xFF;
+|            c2 = w2 >>u 8 & 0xFF;
+|            if xored & 0xFF00 | !c1 {
+|                if c1 <u c2 { return -1 as &i31; }
+|                if c1 >u c2 { return 1 as &i31; }
+|                return 0 as &i31;
+|            }
+|            c1 = w1 >>u 16 & 0xFF;
+|            c2 = w2 >>u 16 & 0xFF;
+|            if xored & 0xFF0000 | !c1 {
+|                if c1 <u c2 { return -1 as &i31; }
+|                if c1 >u c2 { return 1 as &i31; }
+|                return 0 as &i31;
+|            }
+|            c1 = w1 >>u 24;
+|            c2 = w2 >>u 24;
+|            if c1 <u c2 { return -1 as &i31; }
+|            if c1 >u c2 { return 1 as &i31; }
+|            return 0 as &i31;
+|        } /* 'loop */
+|    } /* 'done */
 |    'loop: loop {
 |        if i <u len {
 |            c1 = dv_get_ui8(v1, pos1 + i);
 |            c2 = dv_get_ui8(v2, pos2 + i);
 |            i = i + 1;
 |            if c1 <u c2 { return -1 as &i31; }
 |            if c1 >u c2 { return 1 as &i31; }
 |            if c1 == 0 { return 0 as &i31; }
 |            br 'loop;
 |        }
 |    } /* 'loop */
 |    0 as &i31;
 |}
 |#[export = "caml_bigstring_blit_bytes_to_ba"]
 |#[export = "caml_bigstring_blit_string_to_ba"]
 |fn caml_bigstring_blit_bytes_to_ba

hhugo · 2026-02-15T21:18:56Z

runtime/wasm/runtime.js


  const on_windows = isNode && globalThis.process.platform === "win32";

+  const isV8 = new Error().stack?.includes("\n    at ") ?? false;


Can you add a source for this trick ? should we add a test somewhere to spot if it ever changes ?

hhugo · 2026-02-15T21:25:19Z

runtime/wasm/runtime.js

    dv_get_f32: call.bind(DV.getFloat32),
    dv_get_i64: call.bind(DV.getBigInt64),
-    dv_get_i32: call.bind(DV.getInt32),
+    dv_get_i32: isV8 ? call.bind(DV.getInt32) : (x, y, z) => x.getInt32(y, z),


We need a comment in the code explaining this together with a date or versions at which this was true. And maybe a tiny benchmark that show differences between the two, so that one can check the optim is still accurate

Wasm runtime: optimized some bigstring primitives

eab3e52

Read several characters at a time.

vouillon added the wasm label Jan 20, 2026

vouillon added 2 commits January 20, 2026 13:57

Wasm runtime: optimize call to Dataview.prototype.getInt32 on non-V8 …

8ef5381

…engines Importing this function by Function.prototype.call.bind(Dataview.prototype.getInt32) is optimized in V8 but is much slower than calling it through a JavaScript function with other browsers.

Changes

7d062ca

vouillon force-pushed the bigstring branch from 3c4cb4b to 7d062ca Compare January 20, 2026 13:24

vouillon mentioned this pull request Jan 20, 2026

Wasm_of_ocaml runtime file inhabitedtype/bigstringaf#57

Merged

vouillon marked this pull request as ready for review January 20, 2026 16:19

hhugo mentioned this pull request Jan 30, 2026

Misc: prepare for 6.3 release #2152

Merged

9 tasks

hhugo reviewed Feb 15, 2026

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Comments

Bigstring optimizations#2144

Bigstring optimizations#2144
vouillon wants to merge 3 commits intomasterfrom
bigstring

vouillon commented Jan 20, 2026

Uh oh!

adrien-n commented Jan 20, 2026

Uh oh!

hhugo commented Feb 2, 2026

Uh oh!

vouillon commented Feb 2, 2026

Uh oh!

hhugo commented Feb 15, 2026

Uh oh!

hhugo Feb 15, 2026

Uh oh!

hhugo Feb 15, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants


		const on_windows = isNode && globalThis.process.platform === "win32";

		const isV8 = new Error().stack?.includes("\n at ") ?? false;

Comments

Conversation

vouillon commented Jan 20, 2026

Uh oh!

adrien-n commented Jan 20, 2026

Uh oh!

hhugo commented Feb 2, 2026

Uh oh!

vouillon commented Feb 2, 2026

Uh oh!

hhugo commented Feb 15, 2026

Uh oh!

hhugo Feb 15, 2026

Choose a reason for hiding this comment

Uh oh!

hhugo Feb 15, 2026

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

3 participants