From ffb2b7cd5e2c0fb94c6d913f120b524d053072ae Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Fri, 1 Apr 2022 08:45:49 -0700 Subject: [PATCH] Update decoding of element/data segments in spec interpreter This commit updates the spec interpreter after the merge of the bulk memory proposal to align with the textual specification for the encoding of data and element segments. In original MVP wasm a data/element segmented started with a leb128 for the memory/table index, but this leb128 was repurposed as a flags byte in the bulk-memory/reference-types proposals since it was always zero in practice (and never an over-long zero such as `"\80\00"`). The spec interpreter, however, hasn't been updated and was still reading a u32 for the flags byte, so this commit updates it to instead read a single byte. The tests have been updated accordinatly. Tests for overlong or invalid index encodings were updated to use an encoding that explicitly specifies the index (the prefix `"\02"` byte for both data and element segments). New tests were added to ensure that an overlong encoding of 0, which was previously valid, is no longer valid. --- interpreter/binary/decode.ml | 26 +++++++++++++------------- test/core/binary-leb128.wast | 19 ++++++++++++------- test/core/binary.wast | 29 +++++++++++++++++++++++++++-- 3 files changed, 52 insertions(+), 22 deletions(-) diff --git a/interpreter/binary/decode.ml b/interpreter/binary/decode.ml index 7ed4a9235e..1627be0d65 100644 --- a/interpreter/binary/decode.ml +++ b/interpreter/binary/decode.ml @@ -951,41 +951,41 @@ let elem_kind s = | _ -> error s (pos s - 1) "malformed element kind" let elem s = - match vu32 s with - | 0x00l -> + match u8 s with + | 0x00 -> let emode = at active_zero s in let einit = vec (at elem_index) s in {etype = FuncRefType; einit; emode} - | 0x01l -> + | 0x01 -> let emode = at passive s in let etype = elem_kind s in let einit = vec (at elem_index) s in {etype; einit; emode} - | 0x02l -> + | 0x02 -> let emode = at active s in let etype = elem_kind s in let einit = vec (at elem_index) s in {etype; einit; emode} - | 0x03l -> + | 0x03 -> let emode = at declarative s in let etype = elem_kind s in let einit = vec (at elem_index) s in {etype; einit; emode} - | 0x04l -> + | 0x04 -> let emode = at active_zero s in let einit = vec const s in {etype = FuncRefType; einit; emode} - | 0x05l -> + | 0x05 -> let emode = at passive s in let etype = ref_type s in let einit = vec const s in {etype; einit; emode} - | 0x06l -> + | 0x06 -> let emode = at active s in let etype = ref_type s in let einit = vec const s in {etype; einit; emode} - | 0x07l -> + | 0x07 -> let emode = at declarative s in let etype = ref_type s in let einit = vec const s in @@ -999,16 +999,16 @@ let elem_section s = (* Data section *) let data s = - match vu32 s with - | 0x00l -> + match u8 s with + | 0x00 -> let dmode = at active_zero s in let dinit = string s in {dinit; dmode} - | 0x01l -> + | 0x01 -> let dmode = at passive s in let dinit = string s in {dinit; dmode} - | 0x02l -> + | 0x02 -> let dmode = at active s in let dinit = string s in {dinit; dmode} diff --git a/test/core/binary-leb128.wast b/test/core/binary-leb128.wast index 1d67219583..e6a080f8e5 100644 --- a/test/core/binary-leb128.wast +++ b/test/core/binary-leb128.wast @@ -25,7 +25,8 @@ "\00asm" "\01\00\00\00" "\05\03\01" ;; Memory section with 1 entry "\00\00" ;; no max, minimum 0 - "\0b\07\01" ;; Data section with 1 entry + "\0b\08\01" ;; Data section with 1 entry + "\02" ;; Data with explicit memory index "\80\00" ;; Memory index 0, encoded with 2 bytes "\41\00\0b\00" ;; (i32.const 0) with contents "" ) @@ -236,7 +237,8 @@ "\00asm" "\01\00\00\00" "\05\03\01" ;; Memory section with 1 entry "\00\00" ;; no max, minimum 0 - "\0b\0b\01" ;; Data section with 1 entry + "\0b\0c\01" ;; Data section with 1 entry + "\02" ;; Dat with explicit memory index "\80\80\80\80\80\00" ;; Memory index 0 with one byte too many "\41\00\0b\00" ;; (i32.const 0) with contents "" ) @@ -247,9 +249,10 @@ "\00asm" "\01\00\00\00" "\04\04\01" ;; Table section with 1 entry "\70\00\00" ;; no max, minimum 0, funcref - "\09\0b\01" ;; Element section with 1 entry + "\09\0d\01" ;; Element section with 1 entry + "\02" ;; Element with explicit memory index "\80\80\80\80\80\00" ;; Table index 0 with one byte too many - "\41\00\0b\00" ;; (i32.const 0) with no elements + "\41\00\0b\00\00" ;; (i32.const 0) with no func elements ) "integer representation too long" ) @@ -561,7 +564,8 @@ "\00asm" "\01\00\00\00" "\05\03\01" ;; Memory section with 1 entry "\00\00" ;; no max, minimum 0 - "\0b\0a\01" ;; Data section with 1 entry + "\0b\0b\01" ;; Data section with 1 entry + "\02" ;; Data with explicit memory index "\80\80\80\80\10" ;; Memory index 0 with unused bits set "\41\00\0b\00" ;; (i32.const 0) with contents "" ) @@ -572,9 +576,10 @@ "\00asm" "\01\00\00\00" "\04\04\01" ;; Table section with 1 entry "\70\00\00" ;; no max, minimum 0, funcref - "\09\0a\01" ;; Element section with 1 entry + "\09\0c\01" ;; Element section with 1 entry + "\02" ;; Element with explicit table index "\80\80\80\80\10" ;; Table index 0 with unused bits set - "\41\00\0b\00" ;; (i32.const 0) with no elements + "\41\00\0b\00\00" ;; (i32.const 0) with no elements ) "integer too large" ) diff --git a/test/core/binary.wast b/test/core/binary.wast index 31bb24a937..ad067683cf 100644 --- a/test/core/binary.wast +++ b/test/core/binary.wast @@ -145,11 +145,24 @@ "\00asm" "\01\00\00\00" "\05\03\01" ;; Memory section with 1 entry "\00\00" ;; no max, minimum 0 - "\0b\07\01" ;; Data section with 1 entry - "\80\00" ;; Memory index 0, encoded with 2 bytes + "\0b\08\01" ;; Data section with 1 entry + "\02\80\00" ;; Memory index 0, encoded with 2 bytes "\41\00\0b\00" ;; (i32.const 0) with contents "" ) +;; This was a historically valid module in the MVP spec, but this is no longer +;; valid after the bulk-memory changes were merged in. +(assert_malformed + (module binary + "\00asm" "\01\00\00\00" + "\05\03\01" ;; Memory section with 1 entry + "\00\00" ;; no max, minimum 0 + "\0b\07\01" ;; Data section with 1 entry + "\80\00" ;; Memory index 0, encoded with 2 bytes + "\41\00\0b\00" ;; (i32.const 0) with contents "" + ) + "malformed data segment kind") + ;; Element segment table index can have non-minimal length (module binary "\00asm" "\01\00\00\00" @@ -160,6 +173,18 @@ "\41\00\0b\00\00" ;; (i32.const 0) with no elements ) +;; Element segment table index can have non-minimal length +(assert_malformed + (module binary + "\00asm" "\01\00\00\00" + "\04\04\01" ;; Table section with 1 entry + "\70\00\00" ;; no max, minimum 0, funcref + "\09\07\01" ;; Element section with 1 entry + "\80\00" ;; Table index 0, encoded with 2 bytes + "\41\00\0b\00" ;; (i32.const 0) with no elements + ) + "malformed elements segment kind") + ;; Type section with signed LEB128 encoded type (assert_malformed (module binary