Skip to content

Commit 3b5834d

Browse files
authored
Remove more usages of JsStr in favor of just JsString (#4578)
Move the following to `JsString` (from `JsStr`): - `get` and `get_expect` now return `JsString`. Uses `code_point_at()` instead of `get(usize)` - Move the display types to using a ref to the `JsString` being displayed. Using a `JsString` reference instead of a clone makes it clearer that calling `string.display()` should be temporary (and has a lifetime). - Move trim to `JsString` and return `JsString` directly. - Have the code points iterator be part of the `JsString` API and its `vtable`. - Add inlines and other small time optimizations, like `trim()` not slicing twice. - This results in less conversions from JsStr to JsString. It's more performant because creating a `JsString` from a slice is as fast (thanks to `SliceString`) than a `JsStr` on a slice, but creating a `JsString` from it is faster now. The end goal is to get rid of `JsStr` in its current form. Benchmarks on my machine: | Benchmark | Main | This PR | |--------------|------|---------| | Richards | 235 | 255 | | DeltaBlue | 245 | 254 | | Crypto | 207 | 219 | | RayTrace | 485 | 525 | | EarleyBoyer | 594 | 632 | | RegExp | 87.2 | 90.4 | | Splay | 789 | 896 | | NavierStokes | 484 | 490 | | **Total** | 322 | 342 |
1 parent dd2d785 commit 3b5834d

File tree

20 files changed

+342
-242
lines changed

20 files changed

+342
-242
lines changed

core/engine/src/builtins/intl/segmenter/iterator.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ impl SegmentIterator {
127127
.segmenter
128128
.downcast_ref::<Segmenter>()
129129
.expect("segment iterator object should contain a segmenter");
130-
let mut segments = segmenter.native.segment(string);
130+
let mut segments = segmenter.native.segment(string.variant());
131131
// the first elem is always 0.
132132
segments.next();
133133
segments

core/engine/src/builtins/intl/segmenter/mod.rs

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,7 @@
11
use std::ops::Range;
22

3-
use boa_gc::{Finalize, Trace};
4-
use icu_collator::provider::CollationDiacriticsV1;
5-
use icu_locale::Locale;
6-
use icu_segmenter::{
7-
GraphemeClusterSegmenter, SentenceSegmenter, WordSegmenter,
8-
options::{SentenceBreakOptions, WordBreakOptions},
9-
};
10-
113
use crate::{
12-
Context, JsArgs, JsData, JsNativeError, JsResult, JsStr, JsString, JsSymbol, JsValue,
4+
Context, JsArgs, JsData, JsNativeError, JsResult, JsString, JsSymbol, JsValue,
135
builtins::{
146
BuiltInBuilder, BuiltInConstructor, BuiltInObject, IntrinsicObject,
157
options::{get_option, get_options_object},
@@ -21,6 +13,14 @@ use crate::{
2113
realm::Realm,
2214
string::StaticJsStrings,
2315
};
16+
use boa_gc::{Finalize, Trace};
17+
use boa_string::JsStrVariant;
18+
use icu_collator::provider::CollationDiacriticsV1;
19+
use icu_locale::Locale;
20+
use icu_segmenter::{
21+
GraphemeClusterSegmenter, SentenceSegmenter, WordSegmenter,
22+
options::{SentenceBreakOptions, WordBreakOptions},
23+
};
2424

2525
mod iterator;
2626
mod options;
@@ -62,9 +62,12 @@ impl NativeSegmenter {
6262

6363
/// Segment the passed string, returning an iterator with the index boundaries
6464
/// of the segments.
65-
pub(crate) fn segment<'l, 's>(&'l self, input: JsStr<'s>) -> NativeSegmentIterator<'l, 's> {
66-
match input.variant() {
67-
crate::string::JsStrVariant::Latin1(input) => match self {
65+
pub(crate) fn segment<'l, 's>(
66+
&'l self,
67+
input: JsStrVariant<'s>,
68+
) -> NativeSegmentIterator<'l, 's> {
69+
match input {
70+
JsStrVariant::Latin1(input) => match self {
6871
Self::Grapheme(g) => {
6972
NativeSegmentIterator::GraphemeLatin1(g.as_borrowed().segment_latin1(input))
7073
}
@@ -75,7 +78,7 @@ impl NativeSegmenter {
7578
NativeSegmentIterator::SentenceLatin1(s.as_borrowed().segment_latin1(input))
7679
}
7780
},
78-
crate::string::JsStrVariant::Utf16(input) => match self {
81+
JsStrVariant::Utf16(input) => match self {
7982
Self::Grapheme(g) => {
8083
NativeSegmentIterator::GraphemeUtf16(g.as_borrowed().segment_utf16(input))
8184
}

core/engine/src/builtins/intl/segmenter/segments.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ impl Segments {
8888
// 8. Let startIndex be ! FindBoundary(segmenter, string, n, before).
8989
// 9. Let endIndex be ! FindBoundary(segmenter, string, n, after).
9090
let (range, is_word_like) = {
91-
let mut segments = segmenter.native.segment(segments.string.as_str());
91+
let mut segments = segmenter.native.segment(segments.string.variant());
9292
std::iter::from_fn(|| segments.next().map(|i| (i, segments.is_word_like())))
9393
.tuple_windows()
9494
.find(|((i, _), (j, _))| (*i..*j).contains(&n))

core/engine/src/builtins/json/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ impl Json {
386386
// 7. Else if Type(space) is String, then
387387
} else if let Some(s) = space.as_string() {
388388
// a. If the length of space is 10 or less, let gap be space; otherwise let gap be the substring of space from 0 to 10.
389-
js_string!(s.get(..10).unwrap_or(s.as_str()))
389+
s.get(..10).unwrap_or(s)
390390
// 8. Else,
391391
} else {
392392
// a. Let gap be the empty String.

core/engine/src/builtins/number/globals.rs

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ pub(crate) fn parse_int(_: &JsValue, args: &[JsValue], context: &mut Context) ->
247247
// 0 digit, at the option of the implementation; and if R is not 2, 4, 8, 10, 16, or 32, then
248248
// mathInt may be an implementation-approximated value representing the integer value that is
249249
// represented by Z in radix-R notation.)
250-
let math_int = from_js_str_radix(z, r).expect("Already checked");
250+
let math_int = from_js_str_radix(z.as_str(), r).expect("Already checked");
251251

252252
// 15. If mathInt = 0, then
253253
// a. If sign = -1, return -0𝔽.
@@ -303,11 +303,6 @@ pub(crate) fn parse_float(
303303
args: &[JsValue],
304304
context: &mut Context,
305305
) -> JsResult<JsValue> {
306-
const PLUS_CHAR: u16 = b'+' as u16;
307-
const MINUS_CHAR: u16 = b'-' as u16;
308-
const LOWER_CASE_I_CHAR: u16 = b'i' as u16;
309-
const UPPER_CASE_I_CHAR: u16 = b'I' as u16;
310-
311306
let Some(string) = args.first() else {
312307
return Ok(JsValue::nan());
313308
};
@@ -333,18 +328,34 @@ pub(crate) fn parse_float(
333328
// 5. Let parsedNumber be ParseText(trimmedPrefix, StrDecimalLiteral).
334329
// 6. Assert: parsedNumber is a Parse Node.
335330
// 7. Return the StringNumericValue of parsedNumber.
336-
let (positive, prefix) = match trimmed_string.get(0) {
337-
Some(PLUS_CHAR) => (true, trimmed_string.get(1..).unwrap_or(JsStr::latin1(&[]))),
338-
Some(MINUS_CHAR) => (false, trimmed_string.get(1..).unwrap_or(JsStr::latin1(&[]))),
339-
_ => (true, trimmed_string),
331+
let (positive, prefix) = match trimmed_string
332+
.code_unit_at(0)
333+
.and_then(|c| char::from_u32(u32::from(c)))
334+
{
335+
Some('+') => (
336+
true,
337+
trimmed_string
338+
.get(1..)
339+
.unwrap_or(StaticJsStrings::EMPTY_STRING),
340+
),
341+
Some('-') => (
342+
false,
343+
trimmed_string
344+
.get(1..)
345+
.unwrap_or(StaticJsStrings::EMPTY_STRING),
346+
),
347+
_ => (true, trimmed_string.clone()),
340348
};
341349

342350
if prefix.starts_with(js_str!("Infinity")) {
343351
if positive {
344352
return Ok(JsValue::positive_infinity());
345353
}
346354
return Ok(JsValue::negative_infinity());
347-
} else if let Some(LOWER_CASE_I_CHAR | UPPER_CASE_I_CHAR) = prefix.get(0) {
355+
} else if let Some('I' | 'i') = prefix
356+
.code_unit_at(0)
357+
.and_then(|c| char::from_u32(u32::from(c)))
358+
{
348359
return Ok(JsValue::nan());
349360
}
350361

core/engine/src/builtins/regexp/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1788,7 +1788,7 @@ impl RegExp {
17881788
// 17. Return the string-concatenation of accumulatedResult and the substring of S from nextSourcePosition.
17891789
Ok(js_string!(
17901790
&JsString::from(&accumulated_result[..]),
1791-
s.get_expect(next_source_position..)
1791+
&s.get_expect(next_source_position..)
17921792
)
17931793
.into())
17941794
}

core/engine/src/builtins/string/mod.rs

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -620,11 +620,10 @@ impl String {
620620

621621
match position {
622622
// 4. Let size be the length of S.
623-
IntegerOrInfinity::Integer(i) if i >= 0 => {
623+
IntegerOrInfinity::Integer(i) if i >= 0 && i < string.len() as i64 => {
624624
// 6. Return the Number value for the numeric value of the code unit at index position within the String S.
625-
Ok(string
626-
.get(i as usize)
627-
.map_or_else(JsValue::nan, JsValue::from))
625+
// SAFETY: already validated the index.
626+
Ok(unsafe { string.code_unit_at(i as usize).unwrap_unchecked() }.into())
628627
}
629628
// 5. If position < 0 or position ≥ size, return NaN.
630629
_ => Ok(JsValue::nan()),
@@ -1043,7 +1042,7 @@ impl String {
10431042
};
10441043

10451044
// 10. Let preserved be the substring of string from 0 to position.
1046-
let preserved = JsString::from(string.get_expect(..position));
1045+
let preserved = string.get_expect(..position);
10471046

10481047
let replacement = match replace_value {
10491048
// 11. If functionalReplace is true, then
@@ -1080,7 +1079,7 @@ impl String {
10801079
Ok(js_string!(
10811080
&preserved,
10821081
&replacement,
1083-
&JsString::from(string.get_expect(position + search_length..))
1082+
&string.get_expect(position + search_length..)
10841083
)
10851084
.into())
10861085
}
@@ -1675,7 +1674,7 @@ impl String {
16751674
// 2. Return ? TrimString(S, end).
16761675
let object = this.require_object_coercible()?;
16771676
let string = object.to_string(context)?;
1678-
Ok(js_string!(string.trim_end()).into())
1677+
Ok(string.trim_end().into())
16791678
}
16801679

16811680
/// [`String.prototype.toUpperCase()`][upper] and [`String.prototype.toLowerCase()`][lower]
@@ -1957,9 +1956,8 @@ impl String {
19571956
if separator_length == 0 {
19581957
// a. Let head be the substring of S from 0 to lim.
19591958
// b. Let codeUnits be a List consisting of the sequence of code units that are the elements of head.
1960-
let head = this_str
1961-
.get(..lim)
1962-
.unwrap_or(this_str.as_str())
1959+
let head_str = this_str.get(..lim).unwrap_or(this_str);
1960+
let head = head_str
19631961
.iter()
19641962
.map(|code| js_string!(std::slice::from_ref(&code)).into());
19651963

core/engine/src/builtins/string/tests.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,7 @@ fn char_at() {
761761
#[test]
762762
fn char_code_at() {
763763
run_test_actions([
764-
TestAction::assert_eq("'abc'.charCodeAt-1", f64::NAN),
764+
TestAction::assert_eq("'abc'.charCodeAt(-1)", f64::NAN),
765765
TestAction::assert_eq("'abc'.charCodeAt(1)", 98),
766766
TestAction::assert_eq("'abc'.charCodeAt(9)", f64::NAN),
767767
TestAction::assert_eq("'abc'.charCodeAt()", 97),

core/engine/src/builtins/uri/mod.rs

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ where
309309
}
310310

311311
// b. Let C be the code unit at index k within string.
312-
let c = string.get_expect(k);
312+
let c = string.code_unit_at(k).expect("Bounds were verified");
313313

314314
// c. If C is in unescapedSet, then
315315
if unescaped_set(c) {
@@ -384,7 +384,7 @@ where
384384
}
385385

386386
// b. Let C be the code unit at index k within string.
387-
let c = string.get_expect(k);
387+
let c = string.code_point_at(k).as_u32() as u16;
388388

389389
// c. If C is not the code unit 0x0025 (PERCENT SIGN), then
390390
#[allow(clippy::if_not_else)]
@@ -406,10 +406,17 @@ where
406406
// iii. If the code units at index (k + 1) and (k + 2) within string do not represent
407407
// hexadecimal digits, throw a URIError exception.
408408
// iv. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2).
409-
let b = decode_hex_byte(string.get_expect(k + 1), string.get_expect(k + 2))
410-
.ok_or_else(|| {
411-
JsNativeError::uri().with_message("invalid hexadecimal digit found")
412-
})?;
409+
410+
// SAFETY: the indices have been verified as valid already.
411+
let (high, low) = unsafe {
412+
(
413+
string.code_unit_at(k + 1).unwrap_unchecked(),
414+
string.code_unit_at(k + 2).unwrap_unchecked(),
415+
)
416+
};
417+
let b = decode_hex_byte(high, low).ok_or_else(|| {
418+
JsNativeError::uri().with_message("invalid hexadecimal digit found")
419+
})?;
413420

414421
// v. Set k to k + 2.
415422
k += 2;
@@ -457,18 +464,24 @@ where
457464
k += 1;
458465

459466
// b. If the code unit at index k within string is not the code unit 0x0025 (PERCENT SIGN), throw a URIError exception.
460-
if string.get_expect(k) != 0x0025 {
467+
if string.code_unit_at(k) != Some(0x0025) {
461468
return Err(JsNativeError::uri()
462469
.with_message("escape characters must be preceded with a % sign")
463470
.into());
464471
}
465472

466473
// c. If the code units at index (k + 1) and (k + 2) within string do not represent hexadecimal digits, throw a URIError exception.
467474
// d. Let B be the 8-bit value represented by the two hexadecimal digits at index (k + 1) and (k + 2).
468-
let b = decode_hex_byte(string.get_expect(k + 1), string.get_expect(k + 2))
469-
.ok_or_else(|| {
470-
JsNativeError::uri().with_message("invalid hexadecimal digit found")
471-
})?;
475+
// SAFETY: the indices have been verified as valid already.
476+
let (high, low) = unsafe {
477+
(
478+
string.code_unit_at(k + 1).unwrap_unchecked(),
479+
string.code_unit_at(k + 2).unwrap_unchecked(),
480+
)
481+
};
482+
let b = decode_hex_byte(high, low).ok_or_else(|| {
483+
JsNativeError::uri().with_message("invalid hexadecimal digit found")
484+
})?;
472485

473486
// e. Set k to k + 2.
474487
k += 2;

core/engine/src/vm/opcode/get/property.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ fn get_by_value<const PUSH_KEY: bool>(
114114
}
115115
} else if let Some(string) = base.as_string() {
116116
let value = string
117-
.get(index.get() as usize)
117+
.code_unit_at(index.get() as usize)
118118
.map_or_else(JsValue::undefined, |char| {
119119
js_string!([char].as_slice()).into()
120120
});

0 commit comments

Comments
 (0)