From 65e06d9cf3b88e9b1d75c357baf5326c28d8876c Mon Sep 17 00:00:00 2001
From: theweipeng <wangweipeng@apache.org>
Date: Sat, 21 Dec 2024 21:36:22 +0800
Subject: [PATCH 1/4] src: detect whether the string is one byte representation
 or not

References: nodejs#56090
---
 doc/api/v8.md                                 | 109 ++++++++++++++++++
 lib/v8.js                                     |  13 +++
 src/node_external_reference.h                 |   4 +
 src/node_v8.cc                                |  28 +++++
 ...st-v8-string-is-one-byte-representation.js |  37 ++++++
 5 files changed, 191 insertions(+)
 create mode 100644 test/parallel/test-v8-string-is-one-byte-representation.js

diff --git a/doc/api/v8.md b/doc/api/v8.md
index 670283e17f5d80..26b77b3adce39d 100644
--- a/doc/api/v8.md
+++ b/doc/api/v8.md
@@ -1304,6 +1304,115 @@ setTimeout(() => {
 }, 1000);
 ```
 
+## `v8.isStringOneByteRepresentation(content)`
+
+<!-- YAML
+added: REPLACEME
+-->
+
+* `content` {string}
+* Returns: {boolean}
+
+V8 only supports `Latin-1/ISO-8859-1` and `UTF16` as the underlying representations.
+If the `content` uses `Latin-1/ISO-8859-1` as the underlying representation, this function will return true;
+otherwise, it returns false.
+
+If this method returns false, that does not mean that the string contains some characters not in `Latin-1/ISO-8859-1`.
+Sometimes a `Latin-1` string may also be represented as `UTF16`.
+
+```js
+const { isStringOneByteRepresentation } = require('node:v8');
+const assert = require('node:assert');
+
+const Encoding = {
+  latin1: 1,
+  utf16le: 2,
+};
+
+/**
+ * Read strings from the buffer.
+ *
+ * Note that this example ignores flag checks and boundary checks.
+ */
+class Deserializer {
+  buffer;
+  cursor;
+
+  constructor(buffer) {
+    this.buffer = buffer;
+    this.cursor = 0;
+  }
+
+  readString() {
+    const encoding = this.buffer.readUint8(this.cursor++);
+    const length = this.buffer.readUint32LE(this.cursor);
+    this.cursor += 4;
+    if (encoding === Encoding.latin1) {
+      const result = this.buffer.toString('latin1', this.cursor, this.cursor + length);
+      this.cursor += length;
+      return result;
+    }
+    const result = this.buffer.toString('utf16le', this.cursor, this.cursor + length);
+    this.cursor += length;
+    return result;
+  }
+}
+
+/**
+ * By means of the `isStringOneByteRepresentation` function,
+ * we can write strings into the buffer with high performance,
+ * and it only takes the time of a memcopy.
+ *
+ * Note that this example ignores boundary checks.
+ */
+class Serializer {
+  buffer;
+  cursor;
+
+  constructor() {
+    this.buffer = Buffer.alloc(100);
+    this.cursor = 0;
+  }
+
+  /**
+   * step1: Write the encoding flag to the first byte.
+   * step2: Write the byte length of the string to the next four bytes.
+   * step3: Write the string to the buffer.
+   * @param {string} input
+   */
+  writeString(input) {
+    if (isStringOneByteRepresentation(input)) {
+      this.buffer.writeUint8(Encoding.latin1, this.cursor++);
+      this.buffer.writeUint32LE(input.length, this.cursor);
+      this.cursor += 4;
+      this.buffer.write(input, this.cursor, 'latin1');
+      this.cursor += input.length;
+    } else {
+      this.buffer.writeUint8(Encoding.utf16le, this.cursor++);
+      this.buffer.writeUint32LE(input.length * 2, this.cursor);
+      this.cursor += 4;
+      this.buffer.write(input, this.cursor, 'utf16le');
+      this.cursor += input.length * 2;
+    }
+  }
+
+  finish() {
+    return this.buffer.subarray(0, this.cursor);
+  }
+}
+
+// Write strings to the buffer.
+const serializer = new Serializer();
+serializer.writeString('hello');
+serializer.writeString('你好');
+const data = serializer.finish();
+
+// Read strings from the buffer.
+const deserializer = new Deserializer(data);
+assert(deserializer.readString() === 'hello');
+assert(deserializer.readString() === '你好');
+```
+
 [HTML structured clone algorithm]: https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm
 [Hook Callbacks]: #hook-callbacks
 [V8]: https://developers.google.com/v8/
diff --git a/lib/v8.js b/lib/v8.js
index 7a8979887bab49..381aabfcbfafab 100644
--- a/lib/v8.js
+++ b/lib/v8.js
@@ -104,6 +104,7 @@ const binding = internalBinding('v8');
 const {
   cachedDataVersionTag,
   setFlagsFromString: _setFlagsFromString,
+  isStringOneByteRepresentation: _isStringOneByteRepresentation,
   updateHeapStatisticsBuffer,
   updateHeapSpaceStatisticsBuffer,
   updateHeapCodeStatisticsBuffer,
@@ -155,6 +156,17 @@ function setFlagsFromString(flags) {
   _setFlagsFromString(flags);
 }
 
+/**
+ * Return whether this string uses one byte as underlying representation or not.
+ * @param {string} content
+ * @returns {boolean}
+ */
+function isStringOneByteRepresentation(content) {
+  validateString(content, 'content');
+  return _isStringOneByteRepresentation(content);
+}
+
+
 /**
  * Gets the current V8 heap statistics.
  * @returns {{
@@ -439,4 +451,5 @@ module.exports = {
   startupSnapshot,
   setHeapSnapshotNearHeapLimit,
   GCProfiler,
+  isStringOneByteRepresentation,
 };
diff --git a/src/node_external_reference.h b/src/node_external_reference.h
index 8d49a119c21832..bb007dbdcce486 100644
--- a/src/node_external_reference.h
+++ b/src/node_external_reference.h
@@ -12,6 +12,9 @@ namespace node {
 
 using CFunctionCallbackWithOneByteString =
     uint32_t (*)(v8::Local<v8::Value>, const v8::FastOneByteString&);
+
+using CFunctionCallbackReturnBool = bool (*)(v8::Local<v8::Value> unused,
+                                             v8::Local<v8::Value> receiver);
 using CFunctionCallback = void (*)(v8::Local<v8::Value> unused,
                                    v8::Local<v8::Value> receiver);
 using CFunctionCallbackReturnDouble =
@@ -90,6 +93,7 @@ class ExternalReferenceRegistry {
 #define ALLOWED_EXTERNAL_REFERENCE_TYPES(V)                                    \
   V(CFunctionCallback)                                                         \
   V(CFunctionCallbackWithOneByteString)                                        \
+  V(CFunctionCallbackReturnBool)                                               \
   V(CFunctionCallbackReturnDouble)                                             \
   V(CFunctionCallbackReturnInt32)                                              \
   V(CFunctionCallbackValueReturnDouble)                                        \
diff --git a/src/node_v8.cc b/src/node_v8.cc
index a7f0ba7973498e..eecf09f048891d 100644
--- a/src/node_v8.cc
+++ b/src/node_v8.cc
@@ -32,6 +32,7 @@
 namespace node {
 namespace v8_utils {
 using v8::Array;
+using v8::CFunction;
 using v8::Context;
 using v8::FunctionCallbackInfo;
 using v8::FunctionTemplate;
@@ -238,6 +239,23 @@ void SetFlagsFromString(const FunctionCallbackInfo<Value>& args) {
   V8::SetFlagsFromString(*flags, static_cast<size_t>(flags.length()));
 }
 
+static void IsStringOneByteRepresentation(
+    const FunctionCallbackInfo<Value>& args) {
+  CHECK_EQ(args.Length(), 1);
+  CHECK(args[0]->IsString());
+  bool is_one_byte = args[0].As<String>()->IsOneByte();
+  args.GetReturnValue().Set(is_one_byte);
+}
+
+static bool FastIsStringOneByteRepresentation(Local<Value> receiver,
+                                              const Local<Value> target) {
+  CHECK(target->IsString());
+  return target.As<String>()->IsOneByte();
+}
+
+CFunction fast_is_string_one_byte_representation_(
+    CFunction::Make(FastIsStringOneByteRepresentation));
+
 static const char* GetGCTypeName(v8::GCType gc_type) {
   switch (gc_type) {
     case v8::GCType::kGCTypeScavenge:
@@ -479,6 +497,13 @@ void Initialize(Local<Object> target,
   // Export symbols used by v8.setFlagsFromString()
   SetMethod(context, target, "setFlagsFromString", SetFlagsFromString);
 
+  // Export symbols used by v8.isStringOneByteRepresentation()
+  SetFastMethodNoSideEffect(context,
+                            target,
+                            "isStringOneByteRepresentation",
+                            IsStringOneByteRepresentation,
+                            &fast_is_string_one_byte_representation_);
+
   // GCProfiler
   Local<FunctionTemplate> t =
       NewFunctionTemplate(env->isolate(), GCProfiler::New);
@@ -498,6 +523,9 @@ void RegisterExternalReferences(ExternalReferenceRegistry* registry) {
   registry->Register(GCProfiler::New);
   registry->Register(GCProfiler::Start);
   registry->Register(GCProfiler::Stop);
+  registry->Register(IsStringOneByteRepresentation);
+  registry->Register(FastIsStringOneByteRepresentation);
+  registry->Register(fast_is_string_one_byte_representation_.GetTypeInfo());
 }
 
 }  // namespace v8_utils
diff --git a/test/parallel/test-v8-string-is-one-byte-representation.js b/test/parallel/test-v8-string-is-one-byte-representation.js
new file mode 100644
index 00000000000000..0403299c01015f
--- /dev/null
+++ b/test/parallel/test-v8-string-is-one-byte-representation.js
@@ -0,0 +1,37 @@
+// Flags: --expose-internals
+'use strict';
+require('../common');
+const assert = require('assert');
+const { isStringOneByteRepresentation } = require('v8');
+
+[
+  undefined,
+  null,
+  false,
+  5n,
+  5,
+  Symbol(),
+  () => {},
+  {},
+].forEach((value) => {
+  assert.throws(
+    () => { isStringOneByteRepresentation(value); },
+    /The "content" argument must be of type string/
+  );
+});
+
+{
+  const latin1String = 'hello world!';
+  // Run this inside a for loop to trigger the fast API
+  for (let i = 0; i < 10_000; i++) {
+    assert.strictEqual(isStringOneByteRepresentation(latin1String), true);
+  }
+}
+
+{
+  const utf16String = '你好😀😃';
+  // Run this inside a for loop to trigger the fast API
+  for (let i = 0; i < 10_000; i++) {
+    assert.strictEqual(isStringOneByteRepresentation(utf16String), false);
+  }
+}

From 56278fe01a85023ac399a5600a79f762dbe31307 Mon Sep 17 00:00:00 2001
From: theweipeng <wangweipeng@apache.org>
Date: Mon, 30 Dec 2024 21:39:27 +0800
Subject: [PATCH 2/4] doc: optimize the document

---
 doc/api/v8.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/api/v8.md b/doc/api/v8.md
index 26b77b3adce39d..97c7cad6783029 100644
--- a/doc/api/v8.md
+++ b/doc/api/v8.md
@@ -1313,7 +1313,7 @@ added: REPLACEME
 * `content` {string}
 * Returns: {boolean}
 
-V8 only supports `Latin-1/ISO-8859-1` and `UTF16` as the underlying representations.
+V8 only supports `Latin-1/ISO-8859-1` and `UTF16` as the underlying representation of a string.
 If the `content` uses `Latin-1/ISO-8859-1` as the underlying representation, this function will return true;
 otherwise, it returns false.
 
@@ -1322,7 +1322,7 @@ Sometimes a `Latin-1` string may also be represented as `UTF16`.
 
 ```js
 const { isStringOneByteRepresentation } = require('node:v8');
-const assert = require('node:assert');
+const { strictEqual } = require('node:assert');
 
 const Encoding = {
   latin1: 1,
@@ -1409,8 +1409,8 @@ const data = serializer.finish();
 
 // Read strings from the buffer.
 const deserializer = new Deserializer(data);
-assert(deserializer.readString() === 'hello');
-assert(deserializer.readString() === '你好');
+strictEqual(deserializer.readString(), 'hello');
+strictEqual(deserializer.readString(), '你好');
 ```
 
 [HTML structured clone algorithm]: https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm

From 34d144f4ff5673a7710c0b4ffff27bb5ef4acfb7 Mon Sep 17 00:00:00 2001
From: theweipeng <wangweipeng@apache.org>
Date: Fri, 3 Jan 2025 20:06:13 +0800
Subject: [PATCH 3/4] doc: simplify the document

---
 doc/api/v8.md | 94 +++++++--------------------------------------------
 1 file changed, 12 insertions(+), 82 deletions(-)

diff --git a/doc/api/v8.md b/doc/api/v8.md
index 97c7cad6783029..f67e56697417c7 100644
--- a/doc/api/v8.md
+++ b/doc/api/v8.md
@@ -1322,95 +1322,25 @@ Sometimes a `Latin-1` string may also be represented as `UTF16`.
 
 ```js
 const { isStringOneByteRepresentation } = require('node:v8');
-const { strictEqual } = require('node:assert');
 
 const Encoding = {
   latin1: 1,
   utf16le: 2,
 };
-
-/**
- * Read strings from the buffer.
- *
- * Note that this example ignores flag checks and boundary checks.
- */
-class Deserializer {
-  buffer;
-  cursor;
-
-  constructor(buffer) {
-    this.buffer = buffer;
-    this.cursor = 0;
-  }
-
-  readString() {
-    const encoding = this.buffer.readUint8(this.cursor++);
-    const length = this.buffer.readUint32LE(this.cursor);
-    this.cursor += 4;
-    if (encoding === Encoding.latin1) {
-      const result = this.buffer.toString('latin1', this.cursor, this.cursor + length);
-      this.cursor += length;
-      return result;
-    }
-    const result = this.buffer.toString('utf16le', this.cursor, this.cursor + length);
-    this.cursor += length;
-    return result;
-  }
-}
-
-/**
- * By means of the `isStringOneByteRepresentation` function,
- * we can write strings into the buffer with high performance,
- * and it only takes the time of a memcopy.
- *
- * Note that this example ignores boundary checks.
- */
-class Serializer {
-  buffer;
-  cursor;
-
-  constructor() {
-    this.buffer = Buffer.alloc(100);
-    this.cursor = 0;
-  }
-
-  /**
-   * step1: Write the encoding flag to the first byte.
-   * step2: Write the byte length of the string to the next four bytes.
-   * step3: Write the string to the buffer.
-   * @param {string} input
-   */
-  writeString(input) {
-    if (isStringOneByteRepresentation(input)) {
-      this.buffer.writeUint8(Encoding.latin1, this.cursor++);
-      this.buffer.writeUint32LE(input.length, this.cursor);
-      this.cursor += 4;
-      this.buffer.write(input, this.cursor, 'latin1');
-      this.cursor += input.length;
-    } else {
-      this.buffer.writeUint8(Encoding.utf16le, this.cursor++);
-      this.buffer.writeUint32LE(input.length * 2, this.cursor);
-      this.cursor += 4;
-      this.buffer.write(input, this.cursor, 'utf16le');
-      this.cursor += input.length * 2;
-    }
-  }
-
-  finish() {
-    return this.buffer.subarray(0, this.cursor);
+const buffer = Buffer.alloc(100);
+function writeString(input) {
+  if (isStringOneByteRepresentation(input)) {
+    buffer.writeUint8(Encoding.latin1);
+    buffer.writeUint32LE(input.length, 1);
+    buffer.write(input, 5, 'latin1');
+  } else {
+    buffer.writeUint8(Encoding.utf16le);
+    buffer.writeUint32LE(input.length * 2, 1);
+    buffer.write(input, 5, 'utf16le');
   }
 }
-
-// Write strings to the buffer.
-const serializer = new Serializer();
-serializer.writeString('hello');
-serializer.writeString('你好');
-const data = serializer.finish();
-
-// Read strings from the buffer.
-const deserializer = new Deserializer(data);
-strictEqual(deserializer.readString(), 'hello');
-strictEqual(deserializer.readString(), '你好');
+writeString('hello');
+writeString('你好');
 ```
 
 [HTML structured clone algorithm]: https://developer.mozilla.org/en-US/docs/Web/API/Web_Workers_API/Structured_clone_algorithm

From bd086ce48f85f9323dc386690861e0b3a0ef457b Mon Sep 17 00:00:00 2001
From: theweipeng <wangweipeng@apache.org>
Date: Fri, 3 Jan 2025 23:47:43 +0800
Subject: [PATCH 4/4] doc: remove unused flags

---
 test/parallel/test-v8-string-is-one-byte-representation.js | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/parallel/test-v8-string-is-one-byte-representation.js b/test/parallel/test-v8-string-is-one-byte-representation.js
index 0403299c01015f..ba6eee3021d869 100644
--- a/test/parallel/test-v8-string-is-one-byte-representation.js
+++ b/test/parallel/test-v8-string-is-one-byte-representation.js
@@ -1,4 +1,3 @@
-// Flags: --expose-internals
 'use strict';
 require('../common');
 const assert = require('assert');