Skip to content

Add string conversion error modes #1902

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions lib/loader/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,24 @@ const ARRAY_SIZE = 16;
const BIGINT = typeof BigUint64Array !== "undefined";
const THIS = Symbol();

const STRING_DECODE_THRESHOLD = 32;
const decoder = new TextDecoder("utf-16le");
const STRING_SMALLSIZE = 192; // break-even point in V8
const STRING_CHUNKSIZE = 1024; // mitigate stack overflow
const utf16 = new TextDecoder("utf-16le", { fatal: true }); // != wtf16

/** Gets a string from an U32 and an U16 view on a memory. */
/** Gets a string from memory. */
function getStringImpl(buffer, ptr) {
const len = new Uint32Array(buffer)[ptr + SIZE_OFFSET >>> 2] >>> 1;
const arr = new Uint16Array(buffer, ptr, len);
if (len <= STRING_DECODE_THRESHOLD) {
return String.fromCharCode.apply(String, arr);
let len = new Uint32Array(buffer)[ptr + SIZE_OFFSET >>> 2] >>> 1;
const wtf16 = new Uint16Array(buffer, ptr, len);
if (len <= STRING_SMALLSIZE) return String.fromCharCode(...wtf16);
try {
return utf16.decode(wtf16);
} catch {
let str = "", off = 0;
while (len - off > STRING_CHUNKSIZE) {
str += String.fromCharCode(...wtf16.subarray(off, off += STRING_CHUNKSIZE));
}
return str + String.fromCharCode(...wtf16.subarray(off));
}
return decoder.decode(arr);
}

/** Prepares the base module prior to instantiation. */
Expand Down
7 changes: 5 additions & 2 deletions lib/loader/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,8 @@
"umd/index.js",
"umd/package.json",
"README.md"
]
}
],
"devDependencies": {
"esm2umd": "^0.1.2"
}
}
Binary file modified lib/loader/tests/build/default.wasm
Binary file not shown.
Binary file modified lib/loader/tests/build/legacy.wasm
Binary file not shown.
45 changes: 30 additions & 15 deletions lib/loader/umd/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,33 @@ var loader = (function(exports) {
const ARRAY_SIZE = 16;
const BIGINT = typeof BigUint64Array !== "undefined";
const THIS = Symbol();
const STRING_DECODE_THRESHOLD = 32;
const decoder = new TextDecoder("utf-16le");
/** Gets a string from an U32 and an U16 view on a memory. */
const STRING_SMALLSIZE = 192; // break-even point in V8

const STRING_CHUNKSIZE = 1024; // mitigate stack overflow

const utf16 = new TextDecoder("utf-16le", {
fatal: true
}); // != wtf16

/** Gets a string from memory. */

function getStringImpl(buffer, ptr) {
const len = new Uint32Array(buffer)[ptr + SIZE_OFFSET >>> 2] >>> 1;
const arr = new Uint16Array(buffer, ptr, len);
let len = new Uint32Array(buffer)[ptr + SIZE_OFFSET >>> 2] >>> 1;
const wtf16 = new Uint16Array(buffer, ptr, len);
if (len <= STRING_SMALLSIZE) return String.fromCharCode(...wtf16);

try {
return utf16.decode(wtf16);
} catch {
let str = "",
off = 0;

while (len - off > STRING_CHUNKSIZE) {
str += String.fromCharCode(...wtf16.subarray(off, off += STRING_CHUNKSIZE));
}

if (len <= STRING_DECODE_THRESHOLD) {
return String.fromCharCode.apply(String, arr);
return str + String.fromCharCode(...wtf16.subarray(off));
}

return decoder.decode(arr);
}
/** Prepares the base module prior to instantiation. */

Expand Down Expand Up @@ -110,9 +124,10 @@ var loader = (function(exports) {

const __collect = exports.__collect || F_NOEXPORTRUNTIME;

const __rtti_base = exports.__rtti_base || ~0; // oob if not present


const __rtti_base = exports.__rtti_base;
const getRttiCount = __rtti_base ? function (arr) {
return arr[__rtti_base >>> 2];
} : F_NOEXPORTRUNTIME;
extendedExports.__new = __new;
extendedExports.__pin = __pin;
extendedExports.__unpin = __unpin;
Expand All @@ -121,7 +136,7 @@ var loader = (function(exports) {

function getInfo(id) {
const U32 = new Uint32Array(memory.buffer);
const count = U32[__rtti_base >>> 2];
const count = getRttiCount(U32);
if ((id >>>= 0) >= count) throw Error(`invalid id: ${id}`);
return U32[(__rtti_base + 4 >>> 2) + id * 2];
}
Expand All @@ -138,7 +153,7 @@ var loader = (function(exports) {

function getBase(id) {
const U32 = new Uint32Array(memory.buffer);
const count = U32[__rtti_base >>> 2];
const count = getRttiCount(U32);
if ((id >>>= 0) >= count) throw Error(`invalid id: ${id}`);
return U32[(__rtti_base + 4 >>> 2) + id * 2 + 1];
}
Expand Down Expand Up @@ -330,7 +345,7 @@ var loader = (function(exports) {
const U32 = new Uint32Array(memory.buffer);
let id = U32[ptr + ID_OFFSET >>> 2];

if (id <= U32[__rtti_base >>> 2]) {
if (id <= getRttiCount(U32)) {
do {
if (id == baseId) return true;
id = getBase(id);
Expand Down
17 changes: 13 additions & 4 deletions std/assembly/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1773,12 +1773,21 @@ declare class String {
declare namespace String {
/** Encoding helpers for UTF-8. */
export namespace UTF8 {
/** UTF-8 encoding error modes. */
export const enum ErrorMode {
/** Keeps unpaired surrogates as of WTF-8. This is the default. */
WTF8,
/** Replaces unpaired surrogates with the replacement character (U+FFFD). */
REPLACE,
/** Throws an error on unpaired surrogates. */
ERROR
}
/** Calculates the byte length of the specified string when encoded as UTF-8, optionally null terminated. */
export function byteLength(str: string, nullTerminated?: bool): i32;
/** Encodes the specified string to UTF-8 bytes, optionally null terminated. */
export function encode(str: string, nullTerminated?: bool): ArrayBuffer;
/** Encodes the specified raw string to UTF-8 bytes, opionally null terminated. Returns the number of bytes written. */
export function encodeUnsafe(str: usize, len: i32, buf: usize, nullTerminated?: bool): usize;
/** Encodes the specified string to UTF-8 bytes, optionally null terminated. ErrorMode defaults to WTF-8. */
export function encode(str: string, nullTerminated?: bool, errorMode?: ErrorMode): ArrayBuffer;
/** Encodes the specified raw string to UTF-8 bytes, opionally null terminated. ErrorMode defaults to WTF-8. Returns the number of bytes written. */
export function encodeUnsafe(str: usize, len: i32, buf: usize, nullTerminated?: bool, errorMode?: ErrorMode): usize;
/** Decodes the specified buffer from UTF-8 bytes to a string, optionally null terminated. */
export function decode(buf: ArrayBuffer, nullTerminated?: bool): string;
/** Decodes raw UTF-8 bytes to a string, optionally null terminated. */
Expand Down
48 changes: 33 additions & 15 deletions std/assembly/string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { OBJECT, BLOCK_MAXSIZE, TOTAL_OVERHEAD } from "./rt/common";
import { compareImpl, strtol, strtod, isSpace, isAscii, isFinalSigma, toLower8, toUpper8 } from "./util/string";
import { SPECIALS_UPPER, casemap, bsearch } from "./util/casemap";
import { E_INDEXOUTOFRANGE, E_INVALIDLENGTH } from "./util/error";
import { E_INDEXOUTOFRANGE, E_INVALIDLENGTH, E_UNPAIRED_SURROGATE } from "./util/error";
import { idof } from "./builtins";
import { Array } from "./array";

Expand Down Expand Up @@ -661,6 +661,12 @@ export namespace String {

export namespace UTF8 {

export const enum ErrorMode {
WTF8,
REPLACE,
ERROR
}

export function byteLength(str: string, nullTerminated: bool = false): i32 {
var strOff = changetype<usize>(str);
var strEnd = strOff + <usize>changetype<OBJECT>(changetype<usize>(str) - TOTAL_OVERHEAD).rtSize;
Expand All @@ -687,15 +693,15 @@ export namespace String {
return bufLen;
}

export function encode(str: string, nullTerminated: bool = false): ArrayBuffer {
export function encode(str: string, nullTerminated: bool = false, errorMode: ErrorMode = ErrorMode.WTF8): ArrayBuffer {
var buf = changetype<ArrayBuffer>(__new(<usize>byteLength(str, nullTerminated), idof<ArrayBuffer>()));
encodeUnsafe(changetype<usize>(str), str.length, changetype<usize>(buf), nullTerminated);
encodeUnsafe(changetype<usize>(str), str.length, changetype<usize>(buf), nullTerminated, errorMode);
return buf;
}

// @ts-ignore: decorator
@unsafe
export function encodeUnsafe(str: usize, len: i32, buf: usize, nullTerminated: bool = false): usize {
export function encodeUnsafe(str: usize, len: i32, buf: usize, nullTerminated: bool = false, errorMode: ErrorMode = ErrorMode.WTF8): usize {
var strEnd = str + (<usize>len << 1);
var bufOff = buf;
while (str < strEnd) {
Expand All @@ -709,17 +715,29 @@ export namespace String {
store<u16>(bufOff, b1 << 8 | b0);
bufOff += 2;
} else {
if ((c1 & 0xFC00) == 0xD800 && str + 2 < strEnd) {
let c2 = <u32>load<u16>(str, 2);
if ((c2 & 0xFC00) == 0xDC00) {
c1 = 0x10000 + ((c1 & 0x03FF) << 10) | (c2 & 0x03FF);
let b0 = c1 >> 18 | 240;
let b1 = c1 >> 12 & 63 | 128;
let b2 = c1 >> 6 & 63 | 128;
let b3 = c1 & 63 | 128;
store<u32>(bufOff, b3 << 24 | b2 << 16 | b1 << 8 | b0);
bufOff += 4; str += 4;
continue;
// D800: 11011 0 0000000000 Lead
// DBFF: 11011 0 1111111111
// DC00: 11011 1 0000000000 Trail
// DFFF: 11011 1 1111111111
// F800: 11111 0 0000000000 Mask
// FC00: 11111 1 0000000000
if ((c1 & 0xF800) == 0xD800) {
if (c1 < 0xDC00 && str + 2 < strEnd) {
let c2 = <u32>load<u16>(str, 2);
if ((c2 & 0xFC00) == 0xDC00) {
c1 = 0x10000 + ((c1 & 0x03FF) << 10) | (c2 & 0x03FF);
let b0 = c1 >> 18 | 240;
let b1 = c1 >> 12 & 63 | 128;
let b2 = c1 >> 6 & 63 | 128;
let b3 = c1 & 63 | 128;
store<u32>(bufOff, b3 << 24 | b2 << 16 | b1 << 8 | b0);
bufOff += 4; str += 4;
continue;
}
}
if (errorMode != ErrorMode.WTF8) { // unlikely
if (errorMode == ErrorMode.ERROR) throw new Error(E_UNPAIRED_SURROGATE);
c1 = 0xFFFD;
}
}
let b0 = c1 >> 12 | 224;
Expand Down
4 changes: 4 additions & 0 deletions std/assembly/util/error.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,7 @@ export const E_URI_MALFORMED: string = "URI malformed";
// @ts-ignore: decorator
@lazy @inline
export const E_INVALIDDATE: string = "Invalid Date";

// @ts-ignore: decorator
@lazy @inline
export const E_UNPAIRED_SURROGATE: string = "Unpaired surrogate";
Loading