WebAssembly · kripken · May 8, 2024 · Apr 18, 2024 · Apr 18, 2024 · May 6, 2024
@@ -74,6 +74,7 @@
 #include "ir/module-utils.h"
 #include "ir/names.h"
 #include "pass.h"
+#include "support/insert_ordered.h"
 #include "wasm-builder.h"
 #include "wasm.h"
 
@@ -291,6 +292,7 @@ struct ModuleSplitter {
 
   // Main splitting steps
   void setupJSPI();
+  void handleRefFuncs();
   void moveSecondaryFunctions();
   void thunkExportedSecondaryFunctions();
   void indirectCallsToSecondaryFunctions();
@@ -308,6 +310,7 @@ struct ModuleSplitter {
     if (config.jspi) {
       setupJSPI();
     }
+    handleRefFuncs();
     moveSecondaryFunctions();
     thunkExportedSecondaryFunctions();
     indirectCallsToSecondaryFunctions();
@@ -342,6 +345,78 @@ std::unique_ptr<Module> ModuleSplitter::initSecondary(const Module& primary) {
   return secondary;
 }
 
+void ModuleSplitter::handleRefFuncs() {
+  // Turn function references to functions in the other module to refer instead
+  // to functions in the same one, that perform a direct call to the actual
+  // target in the other one. After splitting, the result is that RefFuncs refer
+  // only to functions in the same module, and the direct calls in them are
+  // handled like all other cross-module calls later.
+  struct Gatherer : public PostWalker<Gatherer> {
+    ModuleSplitter& parent;
+
+    Gatherer(ModuleSplitter& parent) : parent(parent) {}
+
+    // Collect RefFuncs in a map from the function name to all RefFuncs that
+    // refer to it. We have one such map for the primary and secondary modules
+    // (that is, the primary map contains RefFuncs that are present in the
+    // primary module, and that refer to the secondary module, hence they are in
+    // need of fixing).
+    using Map = InsertOrderedMap<Name, std::vector<RefFunc*>>;
+    Map primaryMap, secondaryMap;
+
+    void visitRefFunc(RefFunc* curr) {
+      // If we are not in a function then we consider this the primary module,
+      // as globals and other things remain there.
+      auto* func = getFunction();
+      if (!func || parent.primaryFuncs.count(func->name)) {
+        if (!parent.primaryFuncs.count(curr->func)) {
+          primaryMap[curr->func].push_back(curr);
+        }
+      } else {
+        if (!parent.secondaryFuncs.count(curr->func)) {
+          secondaryMap[curr->func].push_back(curr);
+        }
+      }
+    }
+  } gatherer(*this);
+  gatherer.walkModule(&primary);
+
+  // Fix up what we found: Generate trampolines as described earlier, and apply
+  // them.
+  auto handleModule = [&](Module* module, const Gatherer::Map& map) {
+    Builder builder(*module);
+    // Generate the new trampoline function and add it to the module.
+    for (auto& [name, refFuncs] : map) {
+      // Note that we hardcode |primary| here as functions have not yet moved
+      // over to the secondary module.
+      auto* oldFunc = primary.getFunctionOrNull(name);
+      auto newName = Names::getValidFunctionName(
+        primary, std::string("trampoline_") + name.toString());
+      // The name must also be valid in the secondary module (if we are
+      // processing that one, then we are adding functions to it as we go).
+      newName = Names::getValidFunctionName(secondary, newName);
+
+      // Generate the call and the function.
+      std::vector<Expression*> args;
+      for (Index i = 0; i < oldFunc->getNumParams(); i++) {
+        args.push_back(builder.makeLocalGet(i, oldFunc->getLocalType(i)));
+      }
+      auto* call = builder.makeCall(name, args, oldFunc->getResults());
+
+      module->addFunction(
+        builder.makeFunction(newName, oldFunc->type, {}, call));
+
+      // Update RefFuncs to refer to it.
+      for (auto* refFunc : refFuncs) {
+        assert(refFunc->func == name);
+        refFunc->func = newName;
+      }
+    }
+  };
+  handleModule(&primary, gatherer.primaryMap);
+  handleModule(&secondary, gatherer.secondaryMap);
+}
+
 std::pair<std::set<Name>, std::set<Name>>
 ModuleSplitter::classifyFunctions(const Module& primary, const Config& config) {
   std::set<Name> primaryFuncs, secondaryFuncs;
@@ -460,7 +535,7 @@ Expression* ModuleSplitter::maybeLoadSecondary(Builder& builder,
 void ModuleSplitter::indirectCallsToSecondaryFunctions() {
   // Update direct calls of secondary functions to be indirect calls of their
   // corresponding table indices instead.
-  struct CallIndirector : public WalkerPass<PostWalker<CallIndirector>> {
+  struct CallIndirector : public PostWalker<CallIndirector> {
     ModuleSplitter& parent;
     Builder builder;
     CallIndirector(ModuleSplitter& parent)
@@ -482,12 +557,8 @@ void ModuleSplitter::indirectCallsToSecondaryFunctions() {
                                  func->type,
                                  curr->isReturn)));
     }
-    void visitRefFunc(RefFunc* curr) {
-      assert(false && "TODO: handle ref.func as well");
-    }
   };
-  PassRunner runner(&primary);
-  CallIndirector(*this).run(&runner, &primary);
+  CallIndirector(*this).walkModule(&primary);
 }
 
 void ModuleSplitter::exportImportCalledPrimaryFunctions() {
@@ -506,9 +577,6 @@ void ModuleSplitter::exportImportCalledPrimaryFunctions() {
             calledPrimaryFuncs.push_back(curr->target);
           }
         }
-        void visitRefFunc(RefFunc* curr) {
-          assert(false && "TODO: handle ref.func as well");
-        }
       };
       CallCollector(primaryFuncs, calledPrimaryFuncs).walkFunction(func);
     });

@@ -0,0 +1,101 @@
+;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
+
+;; RUN: wasm-split %s --split-funcs=second -g -o1 %t.1.wasm -o2 %t.2.wasm -all | filecheck %s
+;; RUN: wasm-dis %t.1.wasm | filecheck %s --check-prefix PRIMARY
+;; RUN: wasm-dis %t.2.wasm | filecheck %s --check-prefix SECONDARY
+
+;; Test that we handle ref.func operations properly as we split out $second.
+;; ref.funcs that refer to the other module must be fixed up to refer to
+;; something in the same module, that then trampolines to the other.
+(module
+ ;; PRIMARY:      (type $0 (func))
+
+ ;; PRIMARY:      (import "placeholder" "1" (func $placeholder_1))
+
+ ;; PRIMARY:      (global $glob1 (ref func) (ref.func $prime))
+
+ ;; PRIMARY:      (global $glob2 (ref func) (ref.func $2))
+
+ ;; PRIMARY:      (table $table 2 2 funcref)
+ (table $table 1 1 funcref)
+
+ (global $glob1 (ref func) (ref.func $prime))
+
+ (global $glob2 (ref func) (ref.func $second))
+
+ (elem (i32.const 0) $in-table)
+
+ ;; PRIMARY:      (elem $0 (i32.const 0) $in-table $placeholder_1)
+
+ ;; PRIMARY:      (export "prime" (func $prime))
+
+ ;; PRIMARY:      (export "table" (table $table))
+
+ ;; PRIMARY:      (export "global" (global $glob1))
+
+ ;; PRIMARY:      (export "global_3" (global $glob2))
+
+ ;; PRIMARY:      (func $prime
+ ;; PRIMARY-NEXT:  (drop
+ ;; PRIMARY-NEXT:   (ref.func $prime)
+ ;; PRIMARY-NEXT:  )
+ ;; PRIMARY-NEXT:  (drop
+ ;; PRIMARY-NEXT:   (ref.func $2)
+ ;; PRIMARY-NEXT:  )
+ ;; PRIMARY-NEXT: )
+ (func $prime
+  (drop
+   (ref.func $prime)
+  )
+  (drop
+   (ref.func $second)
+  )
+ )
+
+ ;; SECONDARY:      (type $0 (func))
+
+ ;; SECONDARY:      (import "primary" "table" (table $table 2 2 funcref))
+
+ ;; SECONDARY:      (import "primary" "global" (global $glob1 (ref func)))
+
+ ;; SECONDARY:      (import "primary" "global_3" (global $glob2 (ref func)))
+
+ ;; SECONDARY:      (import "primary" "prime" (func $prime))
+
+ ;; SECONDARY:      (elem $0 (i32.const 1) $second)
+
+ ;; SECONDARY:      (func $0
+ ;; SECONDARY-NEXT:  (call $prime)
+ ;; SECONDARY-NEXT: )
+
+ ;; SECONDARY:      (func $second
+ ;; SECONDARY-NEXT:  (drop
+ ;; SECONDARY-NEXT:   (ref.func $0)
+ ;; SECONDARY-NEXT:  )
+ ;; SECONDARY-NEXT:  (drop
+ ;; SECONDARY-NEXT:   (ref.func $second)
+ ;; SECONDARY-NEXT:  )
+ ;; SECONDARY-NEXT: )
+ (func $second
+  (drop
+   (ref.func $prime)
+  )
+  (drop
+   (ref.func $second)
+  )
+ )
+
+ ;; PRIMARY:      (func $in-table
+ ;; PRIMARY-NEXT:  (nop)
+ ;; PRIMARY-NEXT: )
+ (func $in-table
+  ;; This empty function is in the table. Just being present in the table is not
+  ;; enough of a reason for us to make a trampoline, even though in our IR the
+  ;; table is a list of ref.funcs.
+ )
+)
+;; PRIMARY:      (func $2
+;; PRIMARY-NEXT:  (call_indirect (type $0)
+;; PRIMARY-NEXT:   (i32.const 1)
+;; PRIMARY-NEXT:  )
+;; PRIMARY-NEXT: )