From a1670bc60377d0d5eb349b0dad2ec5d0ea56282d Mon Sep 17 00:00:00 2001 From: Yagiz Nizipli Date: Mon, 8 Apr 2024 14:05:25 -0400 Subject: [PATCH 1/5] buffer: improve `base64` and `base64url` performance Co-authored-by: Daniel Lemire --- src/string_bytes.cc | 85 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 14 deletions(-) diff --git a/src/string_bytes.cc b/src/string_bytes.cc index b3c0a90b548c70..dc528e272e8a05 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -346,17 +346,65 @@ size_t StringBytes::Write(Isolate* isolate, } case BASE64URL: - // Fall through - case BASE64: - if (str->IsExternalOneByte()) { + if (str->IsExternalOneByte()) { // 8-bit case auto ext = str->GetExternalOneByteStringResource(); - nbytes = base64_decode(buf, buflen, ext->data(), ext->length()); + auto result = simdutf::base64_to_binary_safe( + ext->data(), ext->length(), buf, buflen, simdutf::base64_url); + if (result.error == simdutf::error_code::SUCCESS) { + nbytes = result.count; + } else { + // The input does not follow the WHATWG forgiving-base64 specification + // adapted for base64url + // https://infra.spec.whatwg.org/#forgiving-base64-decode + nbytes = base64_decode(buf, buflen, ext->data(), ext->length()); + } } else { String::Value value(isolate, str); - nbytes = base64_decode(buf, buflen, *value, value.length()); + auto result = + simdutf::base64_to_binary(reinterpret_cast(*value), + value.length(), + buf, + simdutf::base64_url); + if (result.error == simdutf::error_code::SUCCESS) { + nbytes = result.count; + } else { + // The input does not follow the WHATWG forgiving-base64 specification + // (adapted for base64url with + and / replaced by - and _). + // https://infra.spec.whatwg.org/#forgiving-base64-decode + nbytes = base64_decode(buf, buflen, *value, value.length()); + } } break; + case BASE64: { + if (str->IsExternalOneByte()) { // 8-bit case + auto ext = str->GetExternalOneByteStringResource(); + auto result = simdutf::base64_to_binary_safe( + ext->data(), ext->length(), buf, buflen); + if (result.error == simdutf::error_code::SUCCESS) { + nbytes = buflen; + } else { + // The input does not follow the WHATWG forgiving-base64 specification + // https://infra.spec.whatwg.org/#forgiving-base64-decode + nbytes = base64_decode(buf, buflen, ext->data(), ext->length()); + } + } else { + String::Value value(isolate, str); + auto result = simdutf::base64_to_binary_safe( + reinterpret_cast(*value), + value.length(), + buf, + buflen); + if (result.error == simdutf::error_code::SUCCESS) { + nbytes = buflen; + } else { + // The input does not follow the WHATWG base64 specification + // https://infra.spec.whatwg.org/#forgiving-base64-decode + nbytes = base64_decode(buf, buflen, *value, value.length()); + } + } + break; + } case HEX: if (str->IsExternalOneByte()) { auto ext = str->GetExternalOneByteStringResource(); @@ -411,9 +459,12 @@ Maybe StringBytes::StorageSize(Isolate* isolate, break; case BASE64URL: - // Fall through + data_size = simdutf::base64_length_from_binary(str->Length(), + simdutf::base64_url); + break; + case BASE64: - data_size = base64_decoded_size_fast(str->Length()); + data_size = simdutf::base64_length_from_binary(str->Length()); break; case HEX: @@ -452,11 +503,15 @@ Maybe StringBytes::Size(Isolate* isolate, case UCS2: return Just(str->Length() * sizeof(uint16_t)); - case BASE64URL: - // Fall through + case BASE64URL: { + String::Value value(isolate, str); + return Just(simdutf::base64_length_from_binary(value.length(), + simdutf::base64_url)); + } + case BASE64: { String::Value value(isolate, str); - return Just(base64_decoded_size(*value, value.length())); + return Just(simdutf::base64_length_from_binary(value.length())); } case HEX: @@ -609,28 +664,30 @@ MaybeLocal StringBytes::Encode(Isolate* isolate, return ExternOneByteString::NewFromCopy(isolate, buf, buflen, error); case BASE64: { - size_t dlen = base64_encoded_size(buflen); + size_t dlen = simdutf::base64_length_from_binary(buflen); char* dst = node::UncheckedMalloc(dlen); if (dst == nullptr) { *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate); return MaybeLocal(); } - size_t written = base64_encode(buf, buflen, dst, dlen); + size_t written = simdutf::binary_to_base64(buf, buflen, dst); CHECK_EQ(written, dlen); return ExternOneByteString::New(isolate, dst, dlen, error); } case BASE64URL: { - size_t dlen = base64_encoded_size(buflen, Base64Mode::URL); + size_t dlen = + simdutf::base64_length_from_binary(buflen, simdutf::base64_url); char* dst = node::UncheckedMalloc(dlen); if (dst == nullptr) { *error = node::ERR_MEMORY_ALLOCATION_FAILED(isolate); return MaybeLocal(); } - size_t written = base64_encode(buf, buflen, dst, dlen, Base64Mode::URL); + size_t written = + simdutf::binary_to_base64(buf, buflen, dst, simdutf::base64_url); CHECK_EQ(written, dlen); return ExternOneByteString::New(isolate, dst, dlen, error); From 8e88778a21de9b55f1287e3435877828f312a0db Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 8 Apr 2024 21:30:36 -0400 Subject: [PATCH 2/5] fix: replace base64_to_binary to account for short inputs --- src/string_bytes.cc | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/string_bytes.cc b/src/string_bytes.cc index dc528e272e8a05..57702a189a4410 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -348,10 +348,11 @@ size_t StringBytes::Write(Isolate* isolate, case BASE64URL: if (str->IsExternalOneByte()) { // 8-bit case auto ext = str->GetExternalOneByteStringResource(); + size_t written_len = buflen; auto result = simdutf::base64_to_binary_safe( - ext->data(), ext->length(), buf, buflen, simdutf::base64_url); + ext->data(), ext->length(), buf, written_len, simdutf::base64_url); if (result.error == simdutf::error_code::SUCCESS) { - nbytes = result.count; + nbytes = written_len; } else { // The input does not follow the WHATWG forgiving-base64 specification // adapted for base64url @@ -360,13 +361,15 @@ size_t StringBytes::Write(Isolate* isolate, } } else { String::Value value(isolate, str); + size_t written_len = buflen; auto result = - simdutf::base64_to_binary(reinterpret_cast(*value), + simdutf::base64_to_binary_safe(reinterpret_cast(*value), value.length(), buf, + written_len, simdutf::base64_url); if (result.error == simdutf::error_code::SUCCESS) { - nbytes = result.count; + nbytes = written_len; } else { // The input does not follow the WHATWG forgiving-base64 specification // (adapted for base64url with + and / replaced by - and _). @@ -379,10 +382,11 @@ size_t StringBytes::Write(Isolate* isolate, case BASE64: { if (str->IsExternalOneByte()) { // 8-bit case auto ext = str->GetExternalOneByteStringResource(); + size_t written_len = buflen; auto result = simdutf::base64_to_binary_safe( - ext->data(), ext->length(), buf, buflen); + ext->data(), ext->length(), buf, written_len); if (result.error == simdutf::error_code::SUCCESS) { - nbytes = buflen; + nbytes = written_len; } else { // The input does not follow the WHATWG forgiving-base64 specification // https://infra.spec.whatwg.org/#forgiving-base64-decode @@ -390,13 +394,14 @@ size_t StringBytes::Write(Isolate* isolate, } } else { String::Value value(isolate, str); + size_t written_len = buflen; auto result = simdutf::base64_to_binary_safe( reinterpret_cast(*value), value.length(), buf, - buflen); + written_len); if (result.error == simdutf::error_code::SUCCESS) { - nbytes = buflen; + nbytes = written_len; } else { // The input does not follow the WHATWG base64 specification // https://infra.spec.whatwg.org/#forgiving-base64-decode From a2b60ac9a884db3195b759fced8472628bbd3569 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 8 Apr 2024 21:34:34 -0400 Subject: [PATCH 3/5] lint --- src/string_bytes.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 57702a189a4410..c28f9ce5f10874 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -362,12 +362,12 @@ size_t StringBytes::Write(Isolate* isolate, } else { String::Value value(isolate, str); size_t written_len = buflen; - auto result = - simdutf::base64_to_binary_safe(reinterpret_cast(*value), - value.length(), - buf, - written_len, - simdutf::base64_url); + auto result = simdutf::base64_to_binary_safe( + reinterpret_cast(*value), + value.length(), + buf, + written_len, + simdutf::base64_url); if (result.error == simdutf::error_code::SUCCESS) { nbytes = written_len; } else { From edbeb503ae189bf5b04d29d617b4d9911554a78f Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 8 Apr 2024 23:28:43 -0400 Subject: [PATCH 4/5] optimization: fast one-byte case --- src/string_bytes.cc | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/string_bytes.cc b/src/string_bytes.cc index c28f9ce5f10874..34fd96f3e476fa 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -359,6 +359,24 @@ size_t StringBytes::Write(Isolate* isolate, // https://infra.spec.whatwg.org/#forgiving-base64-decode nbytes = base64_decode(buf, buflen, ext->data(), ext->length()); } + } else if(str->IsOneByte()) { + MaybeStackBuffer stack_buf(str->Length()); + str->WriteOneByte(isolate, stack_buf.out(), 0, str->Length(), String::NO_NULL_TERMINATION); + size_t written_len = buflen; + auto result = simdutf::base64_to_binary_safe( + reinterpret_cast(*stack_buf), + stack_buf.length(), + buf, + written_len, + simdutf::base64_url); + if (result.error == simdutf::error_code::SUCCESS) { + nbytes = written_len; + } else { + // The input does not follow the WHATWG forgiving-base64 specification + // (adapted for base64url with + and / replaced by - and _). + // https://infra.spec.whatwg.org/#forgiving-base64-decode + nbytes = base64_decode(buf, buflen, *stack_buf, stack_buf.length()); + } } else { String::Value value(isolate, str); size_t written_len = buflen; @@ -392,6 +410,23 @@ size_t StringBytes::Write(Isolate* isolate, // https://infra.spec.whatwg.org/#forgiving-base64-decode nbytes = base64_decode(buf, buflen, ext->data(), ext->length()); } + } else if(str->IsOneByte()) { + MaybeStackBuffer stack_buf(str->Length()); + str->WriteOneByte(isolate, stack_buf.out(), 0, str->Length(), String::NO_NULL_TERMINATION); + size_t written_len = buflen; + auto result = simdutf::base64_to_binary_safe( + reinterpret_cast(*stack_buf), + stack_buf.length(), + buf, + written_len); + if (result.error == simdutf::error_code::SUCCESS) { + nbytes = written_len; + } else { + // The input does not follow the WHATWG forgiving-base64 specification + // (adapted for base64url with + and / replaced by - and _). + // https://infra.spec.whatwg.org/#forgiving-base64-decode + nbytes = base64_decode(buf, buflen, *stack_buf, stack_buf.length()); + } } else { String::Value value(isolate, str); size_t written_len = buflen; From c3d6e4d2ac3f6d09f79f3c17b163a635d3d313e3 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Mon, 8 Apr 2024 23:33:10 -0400 Subject: [PATCH 5/5] lint --- src/string_bytes.cc | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 34fd96f3e476fa..c6ebcf89c4058b 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -359,9 +359,13 @@ size_t StringBytes::Write(Isolate* isolate, // https://infra.spec.whatwg.org/#forgiving-base64-decode nbytes = base64_decode(buf, buflen, ext->data(), ext->length()); } - } else if(str->IsOneByte()) { + } else if (str->IsOneByte()) { MaybeStackBuffer stack_buf(str->Length()); - str->WriteOneByte(isolate, stack_buf.out(), 0, str->Length(), String::NO_NULL_TERMINATION); + str->WriteOneByte(isolate, + stack_buf.out(), + 0, + str->Length(), + String::NO_NULL_TERMINATION); size_t written_len = buflen; auto result = simdutf::base64_to_binary_safe( reinterpret_cast(*stack_buf), @@ -410,9 +414,13 @@ size_t StringBytes::Write(Isolate* isolate, // https://infra.spec.whatwg.org/#forgiving-base64-decode nbytes = base64_decode(buf, buflen, ext->data(), ext->length()); } - } else if(str->IsOneByte()) { + } else if (str->IsOneByte()) { MaybeStackBuffer stack_buf(str->Length()); - str->WriteOneByte(isolate, stack_buf.out(), 0, str->Length(), String::NO_NULL_TERMINATION); + str->WriteOneByte(isolate, + stack_buf.out(), + 0, + str->Length(), + String::NO_NULL_TERMINATION); size_t written_len = buflen; auto result = simdutf::base64_to_binary_safe( reinterpret_cast(*stack_buf),