Skip to content

Workaround for a TextDecoder bug in Safari causing a RangeError to be thrown #4472

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 120 additions & 23 deletions crates/cli-support/src/js/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1714,10 +1714,10 @@ __wbg_set_wasm(wasm);"
if !self.should_write_global("text_encoder") {
return Ok(());
}
self.expose_text_processor("TextEncoder", "encode", "('utf-8')", None)
self.expose_text_processor("const", "TextEncoder", "encode", "('utf-8')", None)
}

fn expose_text_decoder(&mut self) -> Result<(), Error> {
fn expose_text_decoder(&mut self, mem: &MemView, memory: MemoryId) -> Result<(), Error> {
if !self.should_write_global("text_decoder") {
return Ok(());
}
Expand All @@ -1729,32 +1729,82 @@ __wbg_set_wasm(wasm);"
// `ignoreBOM` is needed so that the BOM will be preserved when sending a string from Rust to JS
// `fatal` is needed to catch any weird encoding bugs when sending a string from Rust to JS
self.expose_text_processor(
"let",
"TextDecoder",
"decode",
"('utf-8', { ignoreBOM: true, fatal: true })",
init,
)?;

let text_decoder_decode = self.generate_text_decoder_decode(mem, memory)?;
match &self.config.mode {
OutputMode::Bundler { .. } | OutputMode::Web => {
// For targets that can run in a browser, we need a workaround for the fact that
// (at least) Safari 16 to 18 has a TextDecoder that can't decode anymore after
// processing 2GiB of data. The workaround is that we keep track of how much the
// decoder has decoded and just create a new decoder when we're getting close to
// the limit.
// See MAX_SAFARI_DECODE_BYTES below for link to bug report.

let cached_text_processor = self.generate_cached_text_processor_init(
"TextDecoder",
"decode",
"('utf-8', { ignoreBOM: true, fatal: true })",
)?;

// Maximum number of bytes Safari can handle for one TextDecoder is 2GiB (2147483648)
// but empirically it seems to crash a bit before the end, so we remove 1MiB of margin.
// Workaround for a bug in Safari.
// See https://github.com/rustwasm/wasm-bindgen/issues/4471
const MAX_SAFARI_DECODE_BYTES: u32 = 2147483648 - 1048576;
self.global(&format!(
"
const MAX_SAFARI_DECODE_BYTES = {0};
let numBytesDecoded = 0;
function decodeText(ptr, len) {{
numBytesDecoded += len;
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {{
{1}
cachedTextDecoder.decode();
numBytesDecoded = len;
}}
return {2};
}}
",
MAX_SAFARI_DECODE_BYTES, cached_text_processor, text_decoder_decode,
));
}
_ => {
// For any non-browser target, we can just use the TextDecoder without any workarounds.
// For browser-targets, see the workaround for Safari above.
self.global(&format!(
"
function decodeText(ptr, len) {{
return {};
}}
",
text_decoder_decode,
));
}
}

Ok(())
}

fn expose_text_processor(
&mut self,
decl_kind: &str,
s: &str,
op: &str,
args: &str,
init: Option<&str>,
) -> Result<(), Error> {
let cached_text_processor_init = self.generate_cached_text_processor_init(s, op, args)?;
match &self.config.mode {
OutputMode::Node { .. } => {
let name = self.import_name(&JsImport {
name: JsImportName::Module {
module: "util".to_string(),
name: s.to_string(),
},
fields: Vec::new(),
})?;
self.global(&format!("let cached{} = new {}{};", s, name, args));
// decl_kind is the kind of the kind of the declaration: let or const
// cached_text_processor_init is the rest of the statement for initializing a cached text processor
self.global(&format!("{} {}", decl_kind, cached_text_processor_init));
}
OutputMode::Bundler {
browser_only: false,
Expand All @@ -1766,13 +1816,15 @@ __wbg_set_wasm(wasm);"
",
s
));
self.global(&format!("let cached{0} = new l{0}{1};", s, args));
self.global(&format!("{} {}", decl_kind, cached_text_processor_init));
}
OutputMode::Deno
| OutputMode::Web
| OutputMode::NoModules { .. }
| OutputMode::Bundler { browser_only: true } => {
self.global(&format!("const cached{0} = (typeof {0} !== 'undefined' ? new {0}{1} : {{ {2}: () => {{ throw Error('{0} not available') }} }} );", s, args, op))
// decl_kind is the kind of the kind of the declaration: let or const
// cached_text_processor_init is the rest of the statement for initializing a cached text processor
self.global(&format!("{} {}", decl_kind, cached_text_processor_init))
}
};

Expand All @@ -1795,9 +1847,43 @@ __wbg_set_wasm(wasm);"
Ok(())
}

/// Generates a partial text processor statement, everything except the declaration kind,
/// i.e. everything except for `const` or `let` which the caller needs to handle itself.
fn generate_cached_text_processor_init(
&mut self,
s: &str,
op: &str,
args: &str,
) -> Result<String, Error> {
let new_cached_text_procesor = match &self.config.mode {
OutputMode::Node { .. } => {
let name = self.import_name(&JsImport {
name: JsImportName::Module {
module: "util".to_string(),
name: s.to_string(),
},
fields: Vec::new(),
})?;
format!("cached{} = new {}{};", s, name, args)
}
OutputMode::Bundler {
browser_only: false,
} => {
format!("cached{0} = new l{0}{1};", s, args)
}
OutputMode::Deno
| OutputMode::Web
| OutputMode::NoModules { .. }
| OutputMode::Bundler { browser_only: true } => {
format!("cached{0} = (typeof {0} !== 'undefined' ? new {0}{1} : {{ {2}: () => {{ throw Error('{0} not available') }} }} );", s, args, op)
}
};
Ok(new_cached_text_procesor)
}

fn expose_get_string_from_wasm(&mut self, memory: MemoryId) -> Result<MemView, Error> {
self.expose_text_decoder()?;
let mem = self.expose_uint8_memory(memory);
self.expose_text_decoder(&mem, memory)?;
let ret = MemView {
name: "getStringFromWasm".into(),
num: mem.num,
Expand All @@ -1807,6 +1893,23 @@ __wbg_set_wasm(wasm);"
return Ok(ret);
}

self.global(&format!(
"
function {}(ptr, len) {{
ptr = ptr >>> 0;
return decodeText(ptr, len);
}}
",
ret,
));
Ok(ret)
}

fn generate_text_decoder_decode(
&self,
mem: &MemView,
memory: MemoryId,
) -> Result<String, Error> {
// Typically we try to give a raw view of memory out to `TextDecoder` to
// avoid copying too much data. If, however, a `SharedArrayBuffer` is
// being used it looks like that is rejected by `TextDecoder` or
Expand All @@ -1818,16 +1921,10 @@ __wbg_set_wasm(wasm);"
let is_shared = self.module.memories.get(memory).shared;
let method = if is_shared { "slice" } else { "subarray" };

self.global(&format!(
"
function {}(ptr, len) {{
ptr = ptr >>> 0;
return cachedTextDecoder.decode({}().{}(ptr, ptr + len));
}}
",
ret, mem, method
));
Ok(ret)
Ok(format!(
"cachedTextDecoder.decode({}().{}(ptr, ptr + len))",
mem, method
))
}

fn expose_get_cached_string_from_wasm(
Expand Down
26 changes: 19 additions & 7 deletions crates/cli/tests/reference/anyref-import-catch.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,6 @@ function handleError(f, args) {
}
}

const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;

let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });

cachedTextDecoder.decode();

let cachedUint8ArrayMemory0 = null;

function getUint8ArrayMemory0() {
Expand All @@ -34,9 +28,27 @@ function getUint8ArrayMemory0() {
return cachedUint8ArrayMemory0;
}

const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;

let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });

cachedTextDecoder.decode();

const MAX_SAFARI_DECODE_BYTES = 2146435072;
let numBytesDecoded = 0;
function decodeText(ptr, len) {
numBytesDecoded += len;
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
cachedTextDecoder.decode();
numBytesDecoded = len;
}
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
}

function getStringFromWasm0(ptr, len) {
ptr = ptr >>> 0;
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
return decodeText(ptr, len);
}

function takeFromExternrefTable0(idx) {
Expand Down
26 changes: 19 additions & 7 deletions crates/cli/tests/reference/builder.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@ export function __wbg_set_wasm(val) {
}


const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;

let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });

cachedTextDecoder.decode();

let cachedUint8ArrayMemory0 = null;

function getUint8ArrayMemory0() {
Expand All @@ -19,9 +13,27 @@ function getUint8ArrayMemory0() {
return cachedUint8ArrayMemory0;
}

const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;

let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });

cachedTextDecoder.decode();

const MAX_SAFARI_DECODE_BYTES = 2146435072;
let numBytesDecoded = 0;
function decodeText(ptr, len) {
numBytesDecoded += len;
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
cachedTextDecoder.decode();
numBytesDecoded = len;
}
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
}

function getStringFromWasm0(ptr, len) {
ptr = ptr >>> 0;
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
return decodeText(ptr, len);
}

const ClassBuilderFinalization = (typeof FinalizationRegistry === 'undefined')
Expand Down
26 changes: 19 additions & 7 deletions crates/cli/tests/reference/constructor.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,6 @@ export function __wbg_set_wasm(val) {
}


const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;

let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });

cachedTextDecoder.decode();

let cachedUint8ArrayMemory0 = null;

function getUint8ArrayMemory0() {
Expand All @@ -19,9 +13,27 @@ function getUint8ArrayMemory0() {
return cachedUint8ArrayMemory0;
}

const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;

let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });

cachedTextDecoder.decode();

const MAX_SAFARI_DECODE_BYTES = 2146435072;
let numBytesDecoded = 0;
function decodeText(ptr, len) {
numBytesDecoded += len;
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
cachedTextDecoder.decode();
numBytesDecoded = len;
}
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
}

function getStringFromWasm0(ptr, len) {
ptr = ptr >>> 0;
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
return decodeText(ptr, len);
}

const ClassConstructorFinalization = (typeof FinalizationRegistry === 'undefined')
Expand Down
16 changes: 14 additions & 2 deletions crates/cli/tests/reference/echo.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ function getUint8ArrayMemory0() {

const lTextEncoder = typeof TextEncoder === 'undefined' ? (0, module.require)('util').TextEncoder : TextEncoder;

let cachedTextEncoder = new lTextEncoder('utf-8');
const cachedTextEncoder = new lTextEncoder('utf-8');

const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
? function (arg, view) {
Expand Down Expand Up @@ -155,9 +155,21 @@ let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true

cachedTextDecoder.decode();

const MAX_SAFARI_DECODE_BYTES = 2146435072;
let numBytesDecoded = 0;
function decodeText(ptr, len) {
numBytesDecoded += len;
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
cachedTextDecoder.decode();
numBytesDecoded = len;
}
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
}

function getStringFromWasm0(ptr, len) {
ptr = ptr >>> 0;
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
return decodeText(ptr, len);
}
/**
* @param {number} a
Expand Down
Loading
Loading