Skip to content

Commit 1a75ba6

Browse files
committed
Workaround for a TextDecoder bug in Safari causing a RangeError to be thrown
`TextDecoder` in Safari has a limitation that causes it to throw `RangeError` after decoding more than 2GiB of data. This causes long running wasm programs that need to use `TextDecoder` to crash and start throwing `RuntimeError` with the message "Out of bounds memory access". We work around the issue by tracking how much data has been decoded by any given `TextDecoder`, and replace it when it comes close to 2GiB, deducting a small margin of 1MiB which has been empirically shown to reduce the likelihood of miscounting (for unknown reasons) causing a `RangeError` to be thrown. This commit also adds stricter handling of the kind of declaration used for TextDecoder and TextEncoder - TextDecoder always uses let because it needs to be mutable, and TextEncoder always uses const because it doesn't need to be mutable. Fixes #4471
1 parent c35cc93 commit 1a75ba6

22 files changed

+506
-162
lines changed

crates/cli-support/src/js/mod.rs

+120-16
Original file line numberDiff line numberDiff line change
@@ -1714,10 +1714,10 @@ __wbg_set_wasm(wasm);"
17141714
if !self.should_write_global("text_encoder") {
17151715
return Ok(());
17161716
}
1717-
self.expose_text_processor("TextEncoder", "encode", "('utf-8')", None)
1717+
self.expose_text_processor("const", "TextEncoder", "encode", "('utf-8')", None)
17181718
}
17191719

1720-
fn expose_text_decoder(&mut self) -> Result<(), Error> {
1720+
fn expose_text_decoder(&mut self, mem: &MemView, memory: MemoryId) -> Result<(), Error> {
17211721
if !self.should_write_global("text_decoder") {
17221722
return Ok(());
17231723
}
@@ -1729,22 +1729,77 @@ __wbg_set_wasm(wasm);"
17291729
// `ignoreBOM` is needed so that the BOM will be preserved when sending a string from Rust to JS
17301730
// `fatal` is needed to catch any weird encoding bugs when sending a string from Rust to JS
17311731
self.expose_text_processor(
1732+
"let",
17321733
"TextDecoder",
17331734
"decode",
17341735
"('utf-8', { ignoreBOM: true, fatal: true })",
17351736
init,
17361737
)?;
17371738

1739+
let text_decoder_decode = self.generate_text_decoder_decode(mem, memory)?;
1740+
match &self.config.mode {
1741+
OutputMode::Bundler { .. } | OutputMode::Web => {
1742+
// For targets that can run in a browser, we need a workaround for the fact that
1743+
// (at least) Safari 16 to 18 has a TextDecoder that can't decode anymore after
1744+
// processing 2GiB of data. The workaround is that we keep track of how much the
1745+
// decoder has decoded and just create a new decoder when we're getting close to
1746+
// the limit.
1747+
// See MAX_SAFARI_DECODE_BYTES below for link to bug report.
1748+
1749+
let cached_text_processor = self.generate_cached_text_processor_init(
1750+
"TextDecoder",
1751+
"decode",
1752+
"('utf-8', { ignoreBOM: true, fatal: true })",
1753+
)?;
1754+
1755+
// Maximum number of bytes Safari can handle for one TextDecoder is 2GiB (2147483648)
1756+
// but empirically it seems to crash a bit before the end, so we remove 1MiB of margin.
1757+
// Workaround for a bug in Safari.
1758+
// See https://github.com/rustwasm/wasm-bindgen/issues/4471
1759+
const MAX_SAFARI_DECODE_BYTES: u32 = 2147483648 - 1048576;
1760+
self.global(&format!(
1761+
"
1762+
const MAX_SAFARI_DECODE_BYTES = {0};
1763+
let numBytesDecoded = 0;
1764+
function decodeText(ptr, len) {{
1765+
numBytesDecoded += len;
1766+
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {{
1767+
{1}
1768+
cachedTextDecoder.decode();
1769+
numBytesDecoded = len;
1770+
}}
1771+
return {2};
1772+
}}
1773+
",
1774+
MAX_SAFARI_DECODE_BYTES, cached_text_processor, text_decoder_decode,
1775+
));
1776+
}
1777+
_ => {
1778+
// For any non-browser target, we can just use the TextDecoder without any workarounds.
1779+
// For browser-targets, see the workaround for Safari above.
1780+
self.global(&format!(
1781+
"
1782+
function decodeText(ptr, len) {{
1783+
return {};
1784+
}}
1785+
",
1786+
text_decoder_decode,
1787+
));
1788+
}
1789+
}
1790+
17381791
Ok(())
17391792
}
17401793

17411794
fn expose_text_processor(
17421795
&mut self,
1796+
decl_kind: &str,
17431797
s: &str,
17441798
op: &str,
17451799
args: &str,
17461800
init: Option<&str>,
17471801
) -> Result<(), Error> {
1802+
let cached_text_processor_init = self.generate_cached_text_processor_init(s, op, args)?;
17481803
match &self.config.mode {
17491804
OutputMode::Node { .. } => {
17501805
let name = self.import_name(&JsImport {
@@ -1754,7 +1809,9 @@ __wbg_set_wasm(wasm);"
17541809
},
17551810
fields: Vec::new(),
17561811
})?;
1757-
self.global(&format!("let cached{} = new {}{};", s, name, args));
1812+
// decl_kind is the kind of the kind of the declaration: let or const
1813+
// cached_text_processor_init is the rest of the statement for initializing a cached text processor
1814+
self.global(&format!("{} {}", decl_kind, cached_text_processor_init));
17581815
}
17591816
OutputMode::Bundler {
17601817
browser_only: false,
@@ -1766,13 +1823,15 @@ __wbg_set_wasm(wasm);"
17661823
",
17671824
s
17681825
));
1769-
self.global(&format!("let cached{0} = new l{0}{1};", s, args));
1826+
self.global(&format!("{} {}", decl_kind, cached_text_processor_init));
17701827
}
17711828
OutputMode::Deno
17721829
| OutputMode::Web
17731830
| OutputMode::NoModules { .. }
17741831
| OutputMode::Bundler { browser_only: true } => {
1775-
self.global(&format!("const cached{0} = (typeof {0} !== 'undefined' ? new {0}{1} : {{ {2}: () => {{ throw Error('{0} not available') }} }} );", s, args, op))
1832+
// decl_kind is the kind of the kind of the declaration: let or const
1833+
// cached_text_processor_init is the rest of the statement for initializing a cached text processor
1834+
self.global(&format!("{} {}", decl_kind, cached_text_processor_init))
17761835
}
17771836
};
17781837

@@ -1795,9 +1854,43 @@ __wbg_set_wasm(wasm);"
17951854
Ok(())
17961855
}
17971856

1857+
/// Generates a partial text processor statement, everything except the declaration kind,
1858+
/// i.e. everything except for `const` or `let` which the caller needs to handle itself.
1859+
fn generate_cached_text_processor_init(
1860+
&mut self,
1861+
s: &str,
1862+
op: &str,
1863+
args: &str,
1864+
) -> Result<String, Error> {
1865+
let new_cached_text_procesor = match &self.config.mode {
1866+
OutputMode::Node { .. } => {
1867+
let name = self.import_name(&JsImport {
1868+
name: JsImportName::Module {
1869+
module: "util".to_string(),
1870+
name: s.to_string(),
1871+
},
1872+
fields: Vec::new(),
1873+
})?;
1874+
format!("cached{} = new {}{};", s, name, args)
1875+
}
1876+
OutputMode::Bundler {
1877+
browser_only: false,
1878+
} => {
1879+
format!("cached{0} = new l{0}{1};", s, args)
1880+
}
1881+
OutputMode::Deno
1882+
| OutputMode::Web
1883+
| OutputMode::NoModules { .. }
1884+
| OutputMode::Bundler { browser_only: true } => {
1885+
format!("cached{0} = (typeof {0} !== 'undefined' ? new {0}{1} : {{ {2}: () => {{ throw Error('{0} not available') }} }} );", s, args, op)
1886+
}
1887+
};
1888+
Ok(new_cached_text_procesor)
1889+
}
1890+
17981891
fn expose_get_string_from_wasm(&mut self, memory: MemoryId) -> Result<MemView, Error> {
1799-
self.expose_text_decoder()?;
18001892
let mem = self.expose_uint8_memory(memory);
1893+
self.expose_text_decoder(&mem, memory)?;
18011894
let ret = MemView {
18021895
name: "getStringFromWasm".into(),
18031896
num: mem.num,
@@ -1807,6 +1900,23 @@ __wbg_set_wasm(wasm);"
18071900
return Ok(ret);
18081901
}
18091902

1903+
self.global(&format!(
1904+
"
1905+
function {}(ptr, len) {{
1906+
ptr = ptr >>> 0;
1907+
return decodeText(ptr, len);
1908+
}}
1909+
",
1910+
ret,
1911+
));
1912+
Ok(ret)
1913+
}
1914+
1915+
fn generate_text_decoder_decode(
1916+
&self,
1917+
mem: &MemView,
1918+
memory: MemoryId,
1919+
) -> Result<String, Error> {
18101920
// Typically we try to give a raw view of memory out to `TextDecoder` to
18111921
// avoid copying too much data. If, however, a `SharedArrayBuffer` is
18121922
// being used it looks like that is rejected by `TextDecoder` or
@@ -1818,16 +1928,10 @@ __wbg_set_wasm(wasm);"
18181928
let is_shared = self.module.memories.get(memory).shared;
18191929
let method = if is_shared { "slice" } else { "subarray" };
18201930

1821-
self.global(&format!(
1822-
"
1823-
function {}(ptr, len) {{
1824-
ptr = ptr >>> 0;
1825-
return cachedTextDecoder.decode({}().{}(ptr, ptr + len));
1826-
}}
1827-
",
1828-
ret, mem, method
1829-
));
1830-
Ok(ret)
1931+
Ok(format!(
1932+
"cachedTextDecoder.decode({}().{}(ptr, ptr + len))",
1933+
mem, method
1934+
))
18311935
}
18321936

18331937
fn expose_get_cached_string_from_wasm(

crates/cli/tests/reference/anyref-import-catch.js

+19-7
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,6 @@ function handleError(f, args) {
1919
}
2020
}
2121

22-
const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;
23-
24-
let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
25-
26-
cachedTextDecoder.decode();
27-
2822
let cachedUint8ArrayMemory0 = null;
2923

3024
function getUint8ArrayMemory0() {
@@ -34,9 +28,27 @@ function getUint8ArrayMemory0() {
3428
return cachedUint8ArrayMemory0;
3529
}
3630

31+
const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;
32+
33+
let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
34+
35+
cachedTextDecoder.decode();
36+
37+
const MAX_SAFARI_DECODE_BYTES = 2146435072;
38+
let numBytesDecoded = 0;
39+
function decodeText(ptr, len) {
40+
numBytesDecoded += len;
41+
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
42+
cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
43+
cachedTextDecoder.decode();
44+
numBytesDecoded = len;
45+
}
46+
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
47+
}
48+
3749
function getStringFromWasm0(ptr, len) {
3850
ptr = ptr >>> 0;
39-
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
51+
return decodeText(ptr, len);
4052
}
4153

4254
function takeFromExternrefTable0(idx) {

crates/cli/tests/reference/builder.js

+19-7
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,6 @@ export function __wbg_set_wasm(val) {
44
}
55

66

7-
const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;
8-
9-
let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
10-
11-
cachedTextDecoder.decode();
12-
137
let cachedUint8ArrayMemory0 = null;
148

159
function getUint8ArrayMemory0() {
@@ -19,9 +13,27 @@ function getUint8ArrayMemory0() {
1913
return cachedUint8ArrayMemory0;
2014
}
2115

16+
const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;
17+
18+
let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
19+
20+
cachedTextDecoder.decode();
21+
22+
const MAX_SAFARI_DECODE_BYTES = 2146435072;
23+
let numBytesDecoded = 0;
24+
function decodeText(ptr, len) {
25+
numBytesDecoded += len;
26+
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
27+
cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
28+
cachedTextDecoder.decode();
29+
numBytesDecoded = len;
30+
}
31+
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
32+
}
33+
2234
function getStringFromWasm0(ptr, len) {
2335
ptr = ptr >>> 0;
24-
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
36+
return decodeText(ptr, len);
2537
}
2638

2739
const ClassBuilderFinalization = (typeof FinalizationRegistry === 'undefined')

crates/cli/tests/reference/constructor.js

+19-7
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,6 @@ export function __wbg_set_wasm(val) {
44
}
55

66

7-
const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;
8-
9-
let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
10-
11-
cachedTextDecoder.decode();
12-
137
let cachedUint8ArrayMemory0 = null;
148

159
function getUint8ArrayMemory0() {
@@ -19,9 +13,27 @@ function getUint8ArrayMemory0() {
1913
return cachedUint8ArrayMemory0;
2014
}
2115

16+
const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder;
17+
18+
let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
19+
20+
cachedTextDecoder.decode();
21+
22+
const MAX_SAFARI_DECODE_BYTES = 2146435072;
23+
let numBytesDecoded = 0;
24+
function decodeText(ptr, len) {
25+
numBytesDecoded += len;
26+
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
27+
cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
28+
cachedTextDecoder.decode();
29+
numBytesDecoded = len;
30+
}
31+
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
32+
}
33+
2234
function getStringFromWasm0(ptr, len) {
2335
ptr = ptr >>> 0;
24-
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
36+
return decodeText(ptr, len);
2537
}
2638

2739
const ClassConstructorFinalization = (typeof FinalizationRegistry === 'undefined')

crates/cli/tests/reference/echo.js

+14-2
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ function getUint8ArrayMemory0() {
8282

8383
const lTextEncoder = typeof TextEncoder === 'undefined' ? (0, module.require)('util').TextEncoder : TextEncoder;
8484

85-
let cachedTextEncoder = new lTextEncoder('utf-8');
85+
const cachedTextEncoder = new lTextEncoder('utf-8');
8686

8787
const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
8888
? function (arg, view) {
@@ -155,9 +155,21 @@ let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true
155155

156156
cachedTextDecoder.decode();
157157

158+
const MAX_SAFARI_DECODE_BYTES = 2146435072;
159+
let numBytesDecoded = 0;
160+
function decodeText(ptr, len) {
161+
numBytesDecoded += len;
162+
if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) {
163+
cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true });
164+
cachedTextDecoder.decode();
165+
numBytesDecoded = len;
166+
}
167+
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
168+
}
169+
158170
function getStringFromWasm0(ptr, len) {
159171
ptr = ptr >>> 0;
160-
return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len));
172+
return decodeText(ptr, len);
161173
}
162174
/**
163175
* @param {number} a

0 commit comments

Comments
 (0)