Skip to content

Commit b12ba58

Browse files
author
Mert Can Altin
committed
util: add fast path for Latin1 decoding
1 parent bbdfeeb commit b12ba58

File tree

4 files changed

+121
-0
lines changed

4 files changed

+121
-0
lines changed

lib/internal/encoding.js

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ const {
5555
encodeIntoResults,
5656
encodeUtf8String,
5757
decodeUTF8,
58+
decodeLatin1,
5859
} = binding;
5960

6061
const { Buffer } = require('buffer');
@@ -443,6 +444,10 @@ function makeTextDecoderICU() {
443444
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
444445
}
445446

447+
if (this[kEncoding] === 'windows-1252') {
448+
return decodeLatin1(input);
449+
}
450+
446451
this.#prepareConverter();
447452

448453
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);

src/encoding_binding.cc

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "encoding_binding.h"
22
#include "ada.h"
33
#include "env-inl.h"
4+
#include "node_buffer.h"
45
#include "node_errors.h"
56
#include "node_external_reference.h"
67
#include "simdutf.h"
@@ -226,6 +227,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
226227
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
227228
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
228229
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
230+
SetMethodNoSideEffect(isolate, target, "decodeLatin1", DecodeLatin1);
229231
}
230232

231233
void BindingData::CreatePerContextProperties(Local<Object> target,
@@ -243,6 +245,44 @@ void BindingData::RegisterTimerExternalReferences(
243245
registry->Register(DecodeUTF8);
244246
registry->Register(ToASCII);
245247
registry->Register(ToUnicode);
248+
registry->Register(DecodeLatin1);
249+
}
250+
251+
void BindingData::DecodeLatin1(const FunctionCallbackInfo<Value>& args) {
252+
Environment* env = Environment::GetCurrent(args);
253+
254+
CHECK_GE(args.Length(), 1);
255+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
256+
args[0]->IsArrayBufferView())) {
257+
return node::THROW_ERR_INVALID_ARG_TYPE(
258+
env->isolate(),
259+
"The \"input\" argument must be an instance of ArrayBuffer, "
260+
"SharedArrayBuffer, or ArrayBufferView.");
261+
}
262+
263+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
264+
const uint8_t* data = buffer.data();
265+
size_t length = buffer.length();
266+
267+
if (length == 0) {
268+
return args.GetReturnValue().SetEmptyString();
269+
}
270+
271+
std::string result(length * 2, '\0');
272+
273+
size_t written = simdutf::convert_latin1_to_utf8(
274+
reinterpret_cast<const char*>(data), length, &result[0]);
275+
276+
if (written == 0) {
277+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
278+
env->isolate(), "The encoded data was not valid for encoding latin1");
279+
}
280+
281+
result.resize(written);
282+
283+
Local<Object> buffer_result =
284+
node::Buffer::Copy(env, result.c_str(), result.length()).ToLocalChecked();
285+
args.GetReturnValue().Set(buffer_result);
246286
}
247287

248288
} // namespace encoding_binding

src/encoding_binding.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ class BindingData : public SnapshotableObject {
3131
static void EncodeInto(const v8::FunctionCallbackInfo<v8::Value>& args);
3232
static void EncodeUtf8String(const v8::FunctionCallbackInfo<v8::Value>& args);
3333
static void DecodeUTF8(const v8::FunctionCallbackInfo<v8::Value>& args);
34+
static void DecodeLatin1(const v8::FunctionCallbackInfo<v8::Value>& args);
3435

3536
static void ToASCII(const v8::FunctionCallbackInfo<v8::Value>& args);
3637
static void ToUnicode(const v8::FunctionCallbackInfo<v8::Value>& args);

test/cctest/test_encoding_binding.cc

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#include "encoding_binding.h"
2+
#include "env-inl.h"
3+
#include "gtest/gtest.h"
4+
#include "node_test_fixture.h"
5+
#include "v8.h"
6+
7+
namespace node {
8+
namespace encoding_binding {
9+
10+
bool RunDecodeLatin1(Environment* env,
11+
Local<Value> args[],
12+
Local<Value>* result) {
13+
Isolate* isolate = env->isolate();
14+
TryCatch try_catch(isolate);
15+
16+
BindingData::DecodeLatin1(FunctionCallbackInfo<Value>(args));
17+
18+
if (try_catch.HasCaught()) {
19+
return false;
20+
}
21+
22+
*result = try_catch.Exception();
23+
return true;
24+
}
25+
26+
class EncodingBindingTest : public NodeTestFixture {};
27+
28+
TEST_F(EncodingBindingTest, DecodeLatin1_ValidInput) {
29+
Environment* env = CreateEnvironment();
30+
Isolate* isolate = env->isolate();
31+
HandleScope handle_scope(isolate);
32+
33+
const uint8_t latin1_data[] = {0xC1, 0xE9, 0xF3};
34+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, sizeof(latin1_data));
35+
memcpy(ab->GetBackingStore()->Data(), latin1_data, sizeof(latin1_data));
36+
37+
Local<Uint8Array> array = Uint8Array::New(ab, 0, sizeof(latin1_data));
38+
Local<Value> args[] = {array};
39+
40+
Local<Value> result;
41+
EXPECT_TRUE(RunDecodeLatin1(env, args, &result));
42+
43+
String::Utf8Value utf8_result(isolate, result);
44+
EXPECT_STREQ(*utf8_result, "Áéó");
45+
}
46+
47+
TEST_F(EncodingBindingTest, DecodeLatin1_EmptyInput) {
48+
Environment* env = CreateEnvironment();
49+
Isolate* isolate = env->isolate();
50+
HandleScope handle_scope(isolate);
51+
52+
Local<ArrayBuffer> ab = ArrayBuffer::New(isolate, 0);
53+
Local<Uint8Array> array = Uint8Array::New(ab, 0, 0);
54+
Local<Value> args[] = {array};
55+
56+
Local<Value> result;
57+
EXPECT_TRUE(RunDecodeLatin1(env, args, &result));
58+
59+
String::Utf8Value utf8_result(isolate, result);
60+
EXPECT_STREQ(*utf8_result, "");
61+
}
62+
63+
TEST_F(EncodingBindingTest, DecodeLatin1_InvalidInput) {
64+
Environment* env = CreateEnvironment();
65+
Isolate* isolate = env->isolate();
66+
HandleScope handle_scope(isolate);
67+
68+
Local<Value> args[] = {String::NewFromUtf8Literal(isolate, "Invalid input")};
69+
70+
Local<Value> result;
71+
EXPECT_FALSE(RunDecodeLatin1(env, args, &result));
72+
}
73+
74+
} // namespace encoding_binding
75+
} // namespace node

0 commit comments

Comments
 (0)