@@ -3759,7 +3759,8 @@ static int RecursivelySerializeToUtf8(i::String* string,
3759
3759
int end,
3760
3760
int recursion_budget,
3761
3761
int32_t previous_character,
3762
- int32_t * last_character) {
3762
+ int32_t * last_character,
3763
+ bool replace_invalid_utf8) {
3763
3764
int utf8_bytes = 0 ;
3764
3765
while (true ) {
3765
3766
if (string->IsAsciiRepresentation ()) {
@@ -3775,7 +3776,10 @@ static int RecursivelySerializeToUtf8(i::String* string,
3775
3776
for (int i = start; i < end; i++) {
3776
3777
uint16_t character = data[i];
3777
3778
current +=
3778
- unibrow::Utf8::Encode (current, character, previous_character);
3779
+ unibrow::Utf8::Encode (current,
3780
+ character,
3781
+ previous_character,
3782
+ replace_invalid_utf8);
3779
3783
previous_character = character;
3780
3784
}
3781
3785
*last_character = previous_character;
@@ -3788,7 +3792,10 @@ static int RecursivelySerializeToUtf8(i::String* string,
3788
3792
for (int i = start; i < end; i++) {
3789
3793
uint16_t character = data[i];
3790
3794
current +=
3791
- unibrow::Utf8::Encode (current, character, previous_character);
3795
+ unibrow::Utf8::Encode (current,
3796
+ character,
3797
+ previous_character,
3798
+ replace_invalid_utf8);
3792
3799
previous_character = character;
3793
3800
}
3794
3801
*last_character = previous_character;
@@ -3824,7 +3831,8 @@ static int RecursivelySerializeToUtf8(i::String* string,
3824
3831
boundary,
3825
3832
recursion_budget - 1 ,
3826
3833
previous_character,
3827
- &previous_character);
3834
+ &previous_character,
3835
+ replace_invalid_utf8);
3828
3836
if (extra_utf8_bytes < 0 ) return extra_utf8_bytes;
3829
3837
buffer += extra_utf8_bytes;
3830
3838
utf8_bytes += extra_utf8_bytes;
@@ -3879,7 +3887,10 @@ int String::WriteUtf8(char* buffer,
3879
3887
return len;
3880
3888
}
3881
3889
3882
- if (capacity == -1 || capacity / 3 >= string_length) {
3890
+ bool replace_invalid_utf8 = (options & REPLACE_INVALID_UTF8);
3891
+ int max16BitCodeUnitSize = unibrow::Utf8::kMax16BitCodeUnitSize ;
3892
+
3893
+ if (capacity == -1 || capacity / max16BitCodeUnitSize >= string_length) {
3883
3894
int32_t previous = unibrow::Utf16::kNoPreviousCharacter ;
3884
3895
const int kMaxRecursion = 100 ;
3885
3896
int utf8_bytes =
@@ -3889,7 +3900,8 @@ int String::WriteUtf8(char* buffer,
3889
3900
string_length,
3890
3901
kMaxRecursion ,
3891
3902
previous,
3892
- &previous);
3903
+ &previous,
3904
+ replace_invalid_utf8);
3893
3905
if (utf8_bytes >= 0 ) {
3894
3906
// Success serializing with recursion.
3895
3907
if ((options & NO_NULL_TERMINATION) == 0 &&
@@ -3942,22 +3954,25 @@ int String::WriteUtf8(char* buffer,
3942
3954
char intermediate[unibrow::Utf8::kMaxEncodedSize ];
3943
3955
for (; i < len && pos < capacity; i++) {
3944
3956
i::uc32 c = write_input_buffer.GetNext ();
3945
- if (unibrow::Utf16::IsTrailSurrogate (c) &&
3946
- unibrow::Utf16::IsLeadSurrogate (previous)) {
3957
+ if (unibrow::Utf16::IsSurrogatePair (previous, c)) {
3947
3958
// We can't use the intermediate buffer here because the encoding
3948
3959
// of surrogate pairs is done under assumption that you can step
3949
3960
// back and fix the UTF8 stream. Luckily we only need space for one
3950
3961
// more byte, so there is always space.
3951
3962
ASSERT (pos < capacity);
3952
- int written = unibrow::Utf8::Encode (buffer + pos, c, previous);
3963
+ int written = unibrow::Utf8::Encode (buffer + pos,
3964
+ c,
3965
+ previous,
3966
+ replace_invalid_utf8);
3953
3967
ASSERT (written == 1 );
3954
3968
pos += written;
3955
3969
nchars++;
3956
3970
} else {
3957
3971
int written =
3958
3972
unibrow::Utf8::Encode (intermediate,
3959
3973
c,
3960
- unibrow::Utf16::kNoPreviousCharacter );
3974
+ unibrow::Utf16::kNoPreviousCharacter ,
3975
+ replace_invalid_utf8);
3961
3976
if (pos + written <= capacity) {
3962
3977
for (int j = 0 ; j < written; j++) {
3963
3978
buffer[pos + j] = intermediate[j];
0 commit comments