Skip to content

Commit 3df3afc

Browse files
joyeecheungaduh95
authored andcommitted
module: detect ESM syntax by trying to recompile as SourceTextModule
Instead of using an async function wrapper, just try compiling code with unknown module format as SourceTextModule when it cannot be compiled as CJS and the error message indicates that it's worth a retry. If it can be parsed as SourceTextModule then it's considered ESM. Also, move shouldRetryAsESM() to C++ completely so that we can reuse it in the CJS module loader for require(esm). Drive-by: move methods that don't belong to ContextifyContext out as static methods and move GetHostDefinedOptions to ModuleWrap. PR-URL: #52413 Reviewed-By: Geoffrey Booth <[email protected]> Reviewed-By: Jacob Smith <[email protected]>
1 parent a56faff commit 3df3afc

File tree

7 files changed

+306
-357
lines changed

7 files changed

+306
-357
lines changed

lib/internal/modules/esm/get_format.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
114114
// but this gets called again from `defaultLoad`/`defaultLoadSync`.
115115
if (getOptionValue('--experimental-detect-module')) {
116116
const format = source ?
117-
(containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs') :
117+
(containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs') :
118118
null;
119119
if (format === 'module') {
120120
// This module has a .js extension, a package.json with no `type` field, and ESM syntax.
@@ -158,7 +158,7 @@ function getFileProtocolModuleFormat(url, context = { __proto__: null }, ignoreE
158158
if (!source) { return null; }
159159
const format = getFormatOfExtensionlessFile(url);
160160
if (format === 'module') {
161-
return containsModuleSyntax(`${source}`, fileURLToPath(url)) ? 'module' : 'commonjs';
161+
return containsModuleSyntax(`${source}`, fileURLToPath(url), url) ? 'module' : 'commonjs';
162162
}
163163
return format;
164164
}

lib/internal/modules/helpers.js

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,6 @@ const {
1919
} = require('internal/errors').codes;
2020
const { BuiltinModule } = require('internal/bootstrap/realm');
2121

22-
const {
23-
shouldRetryAsESM: contextifyShouldRetryAsESM,
24-
constants: {
25-
syntaxDetectionErrors: {
26-
esmSyntaxErrorMessages,
27-
throwsOnlyInCommonJSErrorMessages,
28-
},
29-
},
30-
} = internalBinding('contextify');
3122
const { validateString } = require('internal/validators');
3223
const fs = require('fs'); // Import all of `fs` so that it can be monkey-patched.
3324
const internalFS = require('internal/fs/utils');
@@ -329,30 +320,6 @@ function normalizeReferrerURL(referrerName) {
329320
}
330321

331322

332-
let esmSyntaxErrorMessagesSet; // Declared lazily in shouldRetryAsESM
333-
let throwsOnlyInCommonJSErrorMessagesSet; // Declared lazily in shouldRetryAsESM
334-
/**
335-
* After an attempt to parse a module as CommonJS throws an error, should we try again as ESM?
336-
* We only want to try again as ESM if the error is due to syntax that is only valid in ESM; and if the CommonJS parse
337-
* throws on an error that would not have been a syntax error in ESM (like via top-level `await` or a lexical
338-
* redeclaration of one of the CommonJS variables) then we need to parse again to see if it would have thrown in ESM.
339-
* @param {string} errorMessage The string message thrown by V8 when attempting to parse as CommonJS
340-
* @param {string} source Module contents
341-
*/
342-
function shouldRetryAsESM(errorMessage, source) {
343-
esmSyntaxErrorMessagesSet ??= new SafeSet(esmSyntaxErrorMessages);
344-
if (esmSyntaxErrorMessagesSet.has(errorMessage)) {
345-
return true;
346-
}
347-
348-
throwsOnlyInCommonJSErrorMessagesSet ??= new SafeSet(throwsOnlyInCommonJSErrorMessages);
349-
if (throwsOnlyInCommonJSErrorMessagesSet.has(errorMessage)) {
350-
return /** @type {boolean} */(contextifyShouldRetryAsESM(source));
351-
}
352-
353-
return false;
354-
}
355-
356323
/**
357324
* @param {string|undefined} url URL to convert to filename
358325
*/
@@ -382,7 +349,6 @@ module.exports = {
382349
loadBuiltinModule,
383350
makeRequireFunction,
384351
normalizeReferrerURL,
385-
shouldRetryAsESM,
386352
stripBOM,
387353
toRealPath,
388354
hasStartedUserCJSExecution() {

lib/internal/modules/run_main.js

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
'use strict';
22

33
const {
4+
ObjectGetPrototypeOf,
45
StringPrototypeEndsWith,
6+
SyntaxErrorPrototype,
57
globalThis,
68
} = primordials;
79

@@ -159,27 +161,29 @@ function runEntryPointWithESMLoader(callback) {
159161
function executeUserEntryPoint(main = process.argv[1]) {
160162
const resolvedMain = resolveMainPath(main);
161163
const useESMLoader = shouldUseESMLoader(resolvedMain);
162-
164+
let mainURL;
163165
// Unless we know we should use the ESM loader to handle the entry point per the checks in `shouldUseESMLoader`, first
164166
// try to run the entry point via the CommonJS loader; and if that fails under certain conditions, retry as ESM.
165167
let retryAsESM = false;
166168
if (!useESMLoader) {
167169
const cjsLoader = require('internal/modules/cjs/loader');
168170
const { Module } = cjsLoader;
169171
if (getOptionValue('--experimental-detect-module')) {
172+
// TODO(joyeecheung): handle this in the CJS loader. Don't try-catch here.
170173
try {
171174
// Module._load is the monkey-patchable CJS module loader.
172175
Module._load(main, null, true);
173176
} catch (error) {
174-
const source = cjsLoader.entryPointSource;
175-
const { shouldRetryAsESM } = require('internal/modules/helpers');
176-
retryAsESM = shouldRetryAsESM(error.message, source);
177-
// In case the entry point is a large file, such as a bundle,
178-
// ensure no further references can prevent it being garbage-collected.
179-
cjsLoader.entryPointSource = undefined;
177+
if (error != null && ObjectGetPrototypeOf(error) === SyntaxErrorPrototype) {
178+
const { shouldRetryAsESM } = internalBinding('contextify');
179+
const mainPath = resolvedMain || main;
180+
mainURL = pathToFileURL(mainPath).href;
181+
retryAsESM = shouldRetryAsESM(error.message, cjsLoader.entryPointSource, mainURL);
182+
// In case the entry point is a large file, such as a bundle,
183+
// ensure no further references can prevent it being garbage-collected.
184+
cjsLoader.entryPointSource = undefined;
185+
}
180186
if (!retryAsESM) {
181-
const { enrichCJSError } = require('internal/modules/esm/translators');
182-
enrichCJSError(error, source, resolvedMain);
183187
throw error;
184188
}
185189
}
@@ -190,7 +194,9 @@ function executeUserEntryPoint(main = process.argv[1]) {
190194

191195
if (useESMLoader || retryAsESM) {
192196
const mainPath = resolvedMain || main;
193-
const mainURL = pathToFileURL(mainPath).href;
197+
if (mainURL === undefined) {
198+
mainURL = pathToFileURL(mainPath).href;
199+
}
194200

195201
runEntryPointWithESMLoader((cascadedLoader) => {
196202
// Note that if the graph contains unsettled TLA, this may never resolve

src/module_wrap.cc

Lines changed: 100 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,17 @@ v8::Maybe<bool> ModuleWrap::CheckUnsettledTopLevelAwait() {
142142
return v8::Just(false);
143143
}
144144

145-
// new ModuleWrap(url, context, source, lineOffset, columnOffset, cachedData)
145+
Local<PrimitiveArray> ModuleWrap::GetHostDefinedOptions(
146+
Isolate* isolate, Local<Symbol> id_symbol) {
147+
Local<PrimitiveArray> host_defined_options =
148+
PrimitiveArray::New(isolate, HostDefinedOptions::kLength);
149+
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
150+
return host_defined_options;
151+
}
152+
153+
// new ModuleWrap(url, context, source, lineOffset, columnOffset[, cachedData]);
146154
// new ModuleWrap(url, context, source, lineOffset, columOffset,
147-
// hostDefinedOption)
155+
// idSymbol);
148156
// new ModuleWrap(url, context, exportNames, evaluationCallback[, cjsModule])
149157
void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
150158
CHECK(args.IsConstructCall());
@@ -183,9 +191,10 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
183191
// cjsModule])
184192
CHECK(args[3]->IsFunction());
185193
} else {
186-
// new ModuleWrap(url, context, source, lineOffset, columOffset, cachedData)
194+
// new ModuleWrap(url, context, source, lineOffset, columOffset[,
195+
// cachedData]);
187196
// new ModuleWrap(url, context, source, lineOffset, columOffset,
188-
// hostDefinedOption)
197+
// idSymbol);
189198
CHECK(args[2]->IsString());
190199
CHECK(args[3]->IsNumber());
191200
line_offset = args[3].As<Int32>()->Value();
@@ -199,7 +208,7 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
199208
} else {
200209
id_symbol = Symbol::New(isolate, url);
201210
}
202-
host_defined_options->Set(isolate, HostDefinedOptions::kID, id_symbol);
211+
host_defined_options = GetHostDefinedOptions(isolate, id_symbol);
203212

204213
if (that->SetPrivate(context,
205214
realm->isolate_data()->host_defined_option_symbol(),
@@ -234,50 +243,34 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
234243
module = Module::CreateSyntheticModule(
235244
isolate, url, span, SyntheticModuleEvaluationStepsCallback);
236245
} else {
237-
ScriptCompiler::CachedData* cached_data = nullptr;
238-
bool used_cache_from_user = false;
246+
// When we are compiling for the default loader, this will be
247+
// std::nullopt, and CompileSourceTextModule() should use
248+
// on-disk cache.
249+
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data;
250+
if (id_symbol !=
251+
realm->isolate_data()->source_text_module_default_hdo()) {
252+
user_cached_data = nullptr;
253+
}
239254
if (args[5]->IsArrayBufferView()) {
240-
DCHECK(!can_use_builtin_cache); // We don't use this option internally.
241-
used_cache_from_user = true;
255+
CHECK(!can_use_builtin_cache); // We don't use this option internally.
242256
Local<ArrayBufferView> cached_data_buf = args[5].As<ArrayBufferView>();
243257
uint8_t* data =
244258
static_cast<uint8_t*>(cached_data_buf->Buffer()->Data());
245-
cached_data =
259+
user_cached_data =
246260
new ScriptCompiler::CachedData(data + cached_data_buf->ByteOffset(),
247261
cached_data_buf->ByteLength());
248262
}
249-
250263
Local<String> source_text = args[2].As<String>();
251-
ScriptOrigin origin(isolate,
252-
url,
253-
line_offset,
254-
column_offset,
255-
true, // is cross origin
256-
-1, // script id
257-
Local<Value>(), // source map URL
258-
false, // is opaque (?)
259-
false, // is WASM
260-
true, // is ES Module
261-
host_defined_options);
262-
263-
CompileCacheEntry* cache_entry = nullptr;
264-
if (can_use_builtin_cache && realm->env()->use_compile_cache()) {
265-
cache_entry = realm->env()->compile_cache_handler()->GetOrInsert(
266-
source_text, url, CachedCodeType::kESM);
267-
}
268-
if (cache_entry != nullptr && cache_entry->cache != nullptr) {
269-
// source will take ownership of cached_data.
270-
cached_data = cache_entry->CopyCache();
271-
}
272264

273-
ScriptCompiler::Source source(source_text, origin, cached_data);
274-
ScriptCompiler::CompileOptions options;
275-
if (source.GetCachedData() == nullptr) {
276-
options = ScriptCompiler::kNoCompileOptions;
277-
} else {
278-
options = ScriptCompiler::kConsumeCodeCache;
279-
}
280-
if (!ScriptCompiler::CompileModule(isolate, &source, options)
265+
bool cache_rejected = false;
266+
if (!CompileSourceTextModule(realm,
267+
source_text,
268+
url,
269+
line_offset,
270+
column_offset,
271+
host_defined_options,
272+
user_cached_data,
273+
&cache_rejected)
281274
.ToLocal(&module)) {
282275
if (try_catch.HasCaught() && !try_catch.HasTerminated()) {
283276
CHECK(!try_catch.Message().IsEmpty());
@@ -291,18 +284,8 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
291284
return;
292285
}
293286

294-
bool cache_rejected = false;
295-
if (options == ScriptCompiler::kConsumeCodeCache) {
296-
cache_rejected = source.GetCachedData()->rejected;
297-
}
298-
299-
if (cache_entry != nullptr) {
300-
realm->env()->compile_cache_handler()->MaybeSave(
301-
cache_entry, module, cache_rejected);
302-
}
303-
304-
// If the cache comes from builtin compile cache, fail silently.
305-
if (cache_rejected && used_cache_from_user) {
287+
if (user_cached_data.has_value() && user_cached_data.value() != nullptr &&
288+
cache_rejected) {
306289
THROW_ERR_VM_MODULE_CACHED_DATA_REJECTED(
307290
realm, "cachedData buffer was rejected");
308291
try_catch.ReThrow();
@@ -345,6 +328,71 @@ void ModuleWrap::New(const FunctionCallbackInfo<Value>& args) {
345328
args.GetReturnValue().Set(that);
346329
}
347330

331+
MaybeLocal<Module> ModuleWrap::CompileSourceTextModule(
332+
Realm* realm,
333+
Local<String> source_text,
334+
Local<String> url,
335+
int line_offset,
336+
int column_offset,
337+
Local<PrimitiveArray> host_defined_options,
338+
std::optional<ScriptCompiler::CachedData*> user_cached_data,
339+
bool* cache_rejected) {
340+
Isolate* isolate = realm->isolate();
341+
EscapableHandleScope scope(isolate);
342+
ScriptOrigin origin(isolate,
343+
url,
344+
line_offset,
345+
column_offset,
346+
true, // is cross origin
347+
-1, // script id
348+
Local<Value>(), // source map URL
349+
false, // is opaque (?)
350+
false, // is WASM
351+
true, // is ES Module
352+
host_defined_options);
353+
ScriptCompiler::CachedData* cached_data = nullptr;
354+
CompileCacheEntry* cache_entry = nullptr;
355+
// When compiling for the default loader, user_cached_data is std::nullptr.
356+
// When compiling for vm.Module, it's either nullptr or a pointer to the
357+
// cached data.
358+
if (user_cached_data.has_value()) {
359+
cached_data = user_cached_data.value();
360+
} else if (realm->env()->use_compile_cache()) {
361+
cache_entry = realm->env()->compile_cache_handler()->GetOrInsert(
362+
source_text, url, CachedCodeType::kESM);
363+
}
364+
365+
if (cache_entry != nullptr && cache_entry->cache != nullptr) {
366+
// source will take ownership of cached_data.
367+
cached_data = cache_entry->CopyCache();
368+
}
369+
370+
ScriptCompiler::Source source(source_text, origin, cached_data);
371+
ScriptCompiler::CompileOptions options;
372+
if (cached_data == nullptr) {
373+
options = ScriptCompiler::kNoCompileOptions;
374+
} else {
375+
options = ScriptCompiler::kConsumeCodeCache;
376+
}
377+
378+
Local<Module> module;
379+
if (!ScriptCompiler::CompileModule(isolate, &source, options)
380+
.ToLocal(&module)) {
381+
return scope.EscapeMaybe(MaybeLocal<Module>());
382+
}
383+
384+
if (options == ScriptCompiler::kConsumeCodeCache) {
385+
*cache_rejected = source.GetCachedData()->rejected;
386+
}
387+
388+
if (cache_entry != nullptr) {
389+
realm->env()->compile_cache_handler()->MaybeSave(
390+
cache_entry, module, *cache_rejected);
391+
}
392+
393+
return scope.Escape(module);
394+
}
395+
348396
static Local<Object> createImportAttributesContainer(
349397
Realm* realm,
350398
Isolate* isolate,

src/module_wrap.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33

44
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
55

6-
#include <unordered_map>
6+
#include <optional>
77
#include <string>
8+
#include <unordered_map>
89
#include <vector>
910
#include "base_object.h"
11+
#include "v8-script.h"
1012

1113
namespace node {
1214

@@ -69,6 +71,23 @@ class ModuleWrap : public BaseObject {
6971
return true;
7072
}
7173

74+
static v8::Local<v8::PrimitiveArray> GetHostDefinedOptions(
75+
v8::Isolate* isolate, v8::Local<v8::Symbol> symbol);
76+
77+
// When user_cached_data is not std::nullopt, use the code cache if it's not
78+
// nullptr, otherwise don't use code cache.
79+
// TODO(joyeecheung): when it is std::nullopt, use on-disk cache
80+
// See: https://github.com/nodejs/node/issues/47472
81+
static v8::MaybeLocal<v8::Module> CompileSourceTextModule(
82+
Realm* realm,
83+
v8::Local<v8::String> source_text,
84+
v8::Local<v8::String> url,
85+
int line_offset,
86+
int column_offset,
87+
v8::Local<v8::PrimitiveArray> host_defined_options,
88+
std::optional<v8::ScriptCompiler::CachedData*> user_cached_data,
89+
bool* cache_rejected);
90+
7291
private:
7392
ModuleWrap(Realm* realm,
7493
v8::Local<v8::Object> object,

0 commit comments

Comments
 (0)