Skip to content
86 changes: 77 additions & 9 deletions src/ir/module-splitting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
#include "ir/module-utils.h"
#include "ir/names.h"
#include "pass.h"
#include "support/insert_ordered.h"
#include "wasm-builder.h"
#include "wasm.h"

Expand Down Expand Up @@ -291,6 +292,7 @@ struct ModuleSplitter {

// Main splitting steps
void setupJSPI();
void handleRefFuncs();
void moveSecondaryFunctions();
void thunkExportedSecondaryFunctions();
void indirectCallsToSecondaryFunctions();
Expand All @@ -308,6 +310,7 @@ struct ModuleSplitter {
if (config.jspi) {
setupJSPI();
}
handleRefFuncs();
moveSecondaryFunctions();
thunkExportedSecondaryFunctions();
indirectCallsToSecondaryFunctions();
Expand Down Expand Up @@ -342,6 +345,78 @@ std::unique_ptr<Module> ModuleSplitter::initSecondary(const Module& primary) {
return secondary;
}

void ModuleSplitter::handleRefFuncs() {
// Turn function references to functions in the other module to refer instead
// to functions in the same one, that perform a direct call to the actual
// target in the other one. After splitting, the result is that RefFuncs refer
// only to functions in the same module, and the direct calls in them are
// handled like all other cross-module calls later.
struct Gatherer : public PostWalker<Gatherer> {
ModuleSplitter& parent;

Gatherer(ModuleSplitter& parent) : parent(parent) {}

// Collect RefFuncs in a map from the function name to all RefFuncs that
// refer to it. We have one such map for the primary and secondary modules
// (that is, the primary map contains RefFuncs that are present in the
// primary module, and that refer to the secondary module, hence they are in
// need of fixing).
using Map = InsertOrderedMap<Name, std::vector<RefFunc*>>;
Map primaryMap, secondaryMap;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be best to handle secondary function references in the primary module and primary function references in the secondary module separately.

For primary-to-secondary references, these thunks are the best strategy, but for secondary-to-primary references, we almost don't need to do anything since the primary functions are imported into the secondary module and can be referred to directly. For the latter case, all you need to do is replace the TODO that was down in exportImportCalledPrimaryFunctions to collect referred-to primary functions as well.

For the former case, where we still need these thunks, it would be simpler to run this pass only on the primary module after moveSecondaryFunctions is called so you can assume that the parent function will always be a primary function.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, done.


void visitRefFunc(RefFunc* curr) {
// If we are not in a function then we consider this the primary module,
// as globals and other things remain there.
auto* func = getFunction();
if (!func || parent.primaryFuncs.count(func->name)) {
if (!parent.primaryFuncs.count(curr->func)) {
primaryMap[curr->func].push_back(curr);
}
} else {
if (!parent.secondaryFuncs.count(curr->func)) {
secondaryMap[curr->func].push_back(curr);
}
}
}
} gatherer(*this);
gatherer.walkModule(&primary);

// Fix up what we found: Generate trampolines as described earlier, and apply
// them.
auto handleModule = [&](Module* module, const Gatherer::Map& map) {
Builder builder(*module);
// Generate the new trampoline function and add it to the module.
for (auto& [name, refFuncs] : map) {
// Note that we hardcode |primary| here as functions have not yet moved
// over to the secondary module.
auto* oldFunc = primary.getFunctionOrNull(name);
auto newName = Names::getValidFunctionName(
primary, std::string("trampoline_") + name.toString());
// The name must also be valid in the secondary module (if we are
// processing that one, then we are adding functions to it as we go).
newName = Names::getValidFunctionName(secondary, newName);

// Generate the call and the function.
std::vector<Expression*> args;
for (Index i = 0; i < oldFunc->getNumParams(); i++) {
args.push_back(builder.makeLocalGet(i, oldFunc->getLocalType(i)));
}
auto* call = builder.makeCall(name, args, oldFunc->getResults());

module->addFunction(
builder.makeFunction(newName, oldFunc->type, {}, call));

// Update RefFuncs to refer to it.
for (auto* refFunc : refFuncs) {
assert(refFunc->func == name);
refFunc->func = newName;
}
}
};
handleModule(&primary, gatherer.primaryMap);
handleModule(&secondary, gatherer.secondaryMap);
}

std::pair<std::set<Name>, std::set<Name>>
ModuleSplitter::classifyFunctions(const Module& primary, const Config& config) {
std::set<Name> primaryFuncs, secondaryFuncs;
Expand Down Expand Up @@ -460,7 +535,7 @@ Expression* ModuleSplitter::maybeLoadSecondary(Builder& builder,
void ModuleSplitter::indirectCallsToSecondaryFunctions() {
// Update direct calls of secondary functions to be indirect calls of their
// corresponding table indices instead.
struct CallIndirector : public WalkerPass<PostWalker<CallIndirector>> {
struct CallIndirector : public PostWalker<CallIndirector> {
ModuleSplitter& parent;
Builder builder;
CallIndirector(ModuleSplitter& parent)
Expand All @@ -482,12 +557,8 @@ void ModuleSplitter::indirectCallsToSecondaryFunctions() {
func->type,
curr->isReturn)));
}
void visitRefFunc(RefFunc* curr) {
assert(false && "TODO: handle ref.func as well");
}
};
PassRunner runner(&primary);
CallIndirector(*this).run(&runner, &primary);
CallIndirector(*this).walkModule(&primary);
}

void ModuleSplitter::exportImportCalledPrimaryFunctions() {
Expand All @@ -506,9 +577,6 @@ void ModuleSplitter::exportImportCalledPrimaryFunctions() {
calledPrimaryFuncs.push_back(curr->target);
}
}
void visitRefFunc(RefFunc* curr) {
assert(false && "TODO: handle ref.func as well");
}
};
CallCollector(primaryFuncs, calledPrimaryFuncs).walkFunction(func);
});
Expand Down
101 changes: 101 additions & 0 deletions test/lit/wasm-split/ref.func.wast
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.

;; RUN: wasm-split %s --split-funcs=second -g -o1 %t.1.wasm -o2 %t.2.wasm -all | filecheck %s
;; RUN: wasm-dis %t.1.wasm | filecheck %s --check-prefix PRIMARY
;; RUN: wasm-dis %t.2.wasm | filecheck %s --check-prefix SECONDARY

;; Test that we handle ref.func operations properly as we split out $second.
;; ref.funcs that refer to the other module must be fixed up to refer to
;; something in the same module, that then trampolines to the other.
(module
;; PRIMARY: (type $0 (func))

;; PRIMARY: (import "placeholder" "1" (func $placeholder_1))

;; PRIMARY: (global $glob1 (ref func) (ref.func $prime))

;; PRIMARY: (global $glob2 (ref func) (ref.func $2))

;; PRIMARY: (table $table 2 2 funcref)
(table $table 1 1 funcref)

(global $glob1 (ref func) (ref.func $prime))

(global $glob2 (ref func) (ref.func $second))

(elem (i32.const 0) $in-table)

;; PRIMARY: (elem $0 (i32.const 0) $in-table $placeholder_1)

;; PRIMARY: (export "prime" (func $prime))

;; PRIMARY: (export "table" (table $table))

;; PRIMARY: (export "global" (global $glob1))

;; PRIMARY: (export "global_3" (global $glob2))

;; PRIMARY: (func $prime
;; PRIMARY-NEXT: (drop
;; PRIMARY-NEXT: (ref.func $prime)
;; PRIMARY-NEXT: )
;; PRIMARY-NEXT: (drop
;; PRIMARY-NEXT: (ref.func $2)
;; PRIMARY-NEXT: )
;; PRIMARY-NEXT: )
(func $prime
(drop
(ref.func $prime)
)
(drop
(ref.func $second)
)
)

;; SECONDARY: (type $0 (func))

;; SECONDARY: (import "primary" "table" (table $table 2 2 funcref))

;; SECONDARY: (import "primary" "global" (global $glob1 (ref func)))

;; SECONDARY: (import "primary" "global_3" (global $glob2 (ref func)))

;; SECONDARY: (import "primary" "prime" (func $prime))

;; SECONDARY: (elem $0 (i32.const 1) $second)

;; SECONDARY: (func $0
;; SECONDARY-NEXT: (call $prime)
;; SECONDARY-NEXT: )

;; SECONDARY: (func $second
;; SECONDARY-NEXT: (drop
;; SECONDARY-NEXT: (ref.func $0)
;; SECONDARY-NEXT: )
;; SECONDARY-NEXT: (drop
;; SECONDARY-NEXT: (ref.func $second)
;; SECONDARY-NEXT: )
;; SECONDARY-NEXT: )
(func $second
(drop
(ref.func $prime)
)
(drop
(ref.func $second)
)
)

;; PRIMARY: (func $in-table
;; PRIMARY-NEXT: (nop)
;; PRIMARY-NEXT: )
(func $in-table
;; This empty function is in the table. Just being present in the table is not
;; enough of a reason for us to make a trampoline, even though in our IR the
;; table is a list of ref.funcs.
Comment on lines +89 to +91
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does the code differentiate RefFuncs in a table from other RefFuncs?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, this was a part I meant to write Friday evening and somehow forgot 😄 Added now + testing.

)
)
;; PRIMARY: (func $2
;; PRIMARY-NEXT: (call_indirect (type $0)
;; PRIMARY-NEXT: (i32.const 1)
;; PRIMARY-NEXT: )
;; PRIMARY-NEXT: )