WebAssembly · kripken · Aug 24, 2021 · Aug 13, 2021 · Aug 13, 2021 · Aug 13, 2021
@@ -22,7 +22,10 @@
 namespace wasm {
 
 // Make an expression available in a local. If already in one, just
-// use that local, otherwise use a new local
+// use that local, otherwise use a new local.
+//
+// Note that if the local is reused, this assumes it is not modified in between
+// the set and the get, which the caller must ensure.
 
 struct Localizer {
   Index index;

@@ -0,0 +1,59 @@
+/*
+ * Copyright 2021 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef wasm_ir_reorderer_h
+#define wasm_ir_reorderer_h
+
+#include <ir/effects.h>
+#include <wasm-builder.h>
+
+namespace wasm {
+
+//
+// Given two expressions that appear in a specific order - first and then
+// second - this helper can create a sequence in which we return the value of
+// the first. If the two expressions can be reordered, this simply returns
+//
+//   (second, first)
+//
+// If side effects prevent that, it will use a local to save the value of the
+// first, and return it at the end,
+//
+//   (temp = first, second, temp)
+//
+Expression* getResultOfFirst(Expression* first,
+                             Expression* second,
+                             Function* func,
+                             Module* wasm,
+                             const PassOptions& passOptions) {
+  assert(first->type.isConcrete());
+
+  Builder builder(*wasm);
+
+  if (EffectAnalyzer::canReorder(passOptions, wasm->features, first, second)) {
+    return builder.makeSequence(second, first);
+  }
+
+  auto type = first->type;
+  auto index = Builder::addVar(func, type);
+  return builder.makeBlock({builder.makeLocalSet(index, first),
+                            second,
+                            builder.makeLocalGet(index, type)});
+}
+
+} // namespace wasm
+
+#endif // wasm_ir_reorderer_h
@@ -33,6 +33,7 @@
 #include <ir/load-utils.h>
 #include <ir/manipulation.h>
 #include <ir/match.h>
+#include <ir/ordering.h>
 #include <ir/properties.h>
 #include <ir/type-updating.h>
 #include <ir/utils.h>
@@ -212,20 +213,35 @@ struct OptimizeInstructions
 
   bool fastMath;
 
+  bool refinalize = false;
+
   void doWalkFunction(Function* func) {
     fastMath = getPassOptions().fastMath;
-    // first, scan locals
+
+    // First, scan locals.
     {
       LocalScanner scanner(localInfo, getPassOptions());
       scanner.setModule(getModule());
       scanner.walkFunction(func);
     }
-    // main walk
+
+    // Main walk.
     super::doWalkFunction(func);
+
+    // If we need to update parent types, do so.
+    if (refinalize) {
+      ReFinalize().walkFunctionInModule(func, getModule());
+    }
+
+    // Final optimizations.
     {
       FinalOptimizer optimizer(getPassOptions());
       optimizer.walkFunction(func);
     }
+
+    // Some patterns create locals (like when we use getResultOfFirst), which we
+    // may need to fix up.
+    TypeUpdating::handleNonDefaultableLocals(func, *getModule());
   }
 
   // Set to true when one of the visitors makes a change (either replacing the
@@ -1263,41 +1279,82 @@ struct OptimizeInstructions
     skipNonNullCast(curr->srcRef);
   }
 
+  bool canBeCastTo(HeapType a, HeapType b) {
+    return HeapType::isSubType(a, b) || HeapType::isSubType(b, a);
+  }
+
   void visitRefCast(RefCast* curr) {
     if (curr->type == Type::unreachable) {
       return;
     }
 
+    Builder builder(*getModule());
     auto passOptions = getPassOptions();
 
+    auto fallthrough = Properties::getFallthrough(
+      curr->ref, getPassOptions(), getModule()->features);
+
+    // If the value is a null, it will just flow through, and we do not need the
+    // cast. However, if that would change the type, then things are less
+    // simple: if the original type was non-nullable, replacing it with a null
+    // would change the type, which can happen in e.g.
+    //   (ref.cast (ref.as_non_null (.. (ref.null)
+    if (fallthrough->is<RefNull>()) {
+      // Replace the expression with drops of the inputs, and a null. Note that
+      // we provide a null of the type the outside expects - that of the rtt,
+      // which is what was cast to.
+      Expression* rep =
+        builder.makeBlock({builder.makeDrop(curr->ref),
+                           builder.makeDrop(curr->rtt),
+                           builder.makeRefNull(curr->rtt->type.getHeapType())});
+      if (curr->ref->type.isNonNullable()) {
+        // Avoid a type change by forcing to be non-nullable. In practice, this
+        // would have trapped before we get here, so this is just for
+        // validation.
+        rep = builder.makeRefAs(RefAsNonNull, rep);
+      }
+      replaceCurrent(rep);
+      return;
+      // TODO: The optimal ordering of this and the other ref.as_non_null stuff
+      //       later down in this functions is unclear and may be worth looking
+      //       into.
+    }
+
+    // For the cast to be able to succeed, the value being cast must be a
+    // subtype of the desired type, as RTT subtyping is a subset of static
+    // subtyping. For example, trying to cast an array to a struct would be
+    // incompatible.
+    if (!canBeCastTo(curr->ref->type.getHeapType(),
+                     curr->rtt->type.getHeapType())) {
+      // This cast cannot succeed. If the input is not a null, it will
+      // definitely trap.
+      if (fallthrough->type.isNonNullable()) {
+        // Our type will now be unreachable; update the parents.
+        refinalize = true;
+        replaceCurrent(builder.makeBlock({builder.makeDrop(curr->ref),
+                                          builder.makeDrop(curr->rtt),
+                                          builder.makeUnreachable()}));
+        return;
+      }
+      // Otherwise, we are not sure what it is, and need to wait for runtime to
+      // see if it is a null or not. (We've already handled the case where we
+      // can see the value is definitely a null at compile time, earlier.)
+    }
+
     if (passOptions.ignoreImplicitTraps || passOptions.trapsNeverHappen) {
-      // A ref.cast traps when the RTTs do not line up, which can be of one of
-      // two sorts of issues:
-      //  1. The value being cast is not even a subtype of the cast type. In
-      //     that case the RTTs trivially cannot indicate subtyping, because
-      //     RTT subtyping is a subset of static subtyping. For example, maybe
-      //     we are trying to cast a {i32} struct to an [f64] array.
-      //  2. The value is a subtype of the cast type, but the RTTs still do not
-      //     fit. That indicates a difference between RTT subtyping and static
-      //     subtyping. That is, the type may be right but the chain of rtt.subs
-      //     is not.
-      // If we ignore a possible trap then we would like to assume that neither
-      // of those two situations can happen. However, we still cannot do
-      // anything if point 1 is a problem, that is, if the value is not a
-      // subtype of the cast type, as we can't remove the cast in that case -
-      // the wasm would not validate. But if the type *is* a subtype, then we
-      // can ignore a possible trap on 2 and remove it.
-      //
-      // We also do not do this if the arguments cannot be reordered. If we
-      // can't do that then we need to add a drop, at minimum (which may still
-      // be worthwhile, but depends on other optimizations kicking in, so it's
-      // not clearly worthwhile).
+      // Aside from the issue of type incompatibility as mentioned above, the
+      // cast can trap if the types *are* compatible but it happens to be the
+      // case at runtime that the value is not of the desired subtype. If we
+      // do not consider such traps possible, we can ignore that. Note, though,
+      // that we cannot do this if we cannot replace the current type with the
+      // reference's type.
       if (HeapType::isSubType(curr->ref->type.getHeapType(),
-                              curr->rtt->type.getHeapType()) &&
-          canReorder(curr->ref, curr->rtt)) {
-        Builder builder(*getModule());
-        replaceCurrent(
-          builder.makeSequence(builder.makeDrop(curr->rtt), curr->ref));
+                              curr->rtt->type.getHeapType())) {
+        replaceCurrent(getResultOfFirst(curr->ref,
+                                        builder.makeDrop(curr->rtt),
+                                        getFunction(),
+                                        getModule(),
+                                        passOptions));
         return;
       }
     }
@@ -1358,6 +1415,18 @@ struct OptimizeInstructions
     }
   }
 
+  void visitRefTest(RefTest* curr) {
+    // See above in RefCast.
+    if (!canBeCastTo(curr->ref->type.getHeapType(),
+                     curr->rtt->type.getHeapType())) {
+      // This test cannot succeed, and will definitely return 0.
+      Builder builder(*getModule());
+      replaceCurrent(builder.makeBlock({builder.makeDrop(curr->ref),
+                                        builder.makeDrop(curr->rtt),
+                                        builder.makeConst(int32_t(0))}));
+    }
+  }
+
   void visitRefIs(RefIs* curr) {
     if (curr->type == Type::unreachable) {
       return;

@@ -4,7 +4,6 @@
 (module
  ;; CHECK:      (type $none_=>_none (func))
  (type $none_=>_none (func))
- ;; CHECK:      (type $none_=>_i32 (func (result i32)))
  (type $none_=>_i32 (func (result i32)))
  ;; CHECK:      (func $0
  ;; CHECK-NEXT:  (nop)
@@ -15,10 +14,7 @@
  ;; CHECK:      (func $1
  ;; CHECK-NEXT:  (drop
  ;; CHECK-NEXT:   (call_ref
- ;; CHECK-NEXT:    (ref.cast
- ;; CHECK-NEXT:     (ref.func $0)
- ;; CHECK-NEXT:     (rtt.canon $none_=>_i32)
- ;; CHECK-NEXT:    )
+ ;; CHECK-NEXT:    (unreachable)
  ;; CHECK-NEXT:   )
  ;; CHECK-NEXT:  )
  ;; CHECK-NEXT: )
@@ -28,7 +24,9 @@
   ;; where it inlines, for efficiency). As part of the optimiziations, we will
   ;; try to precompute the cast here, which will try to look up $0. We should
   ;; not hit an assertion, rather we should skip precomputing it, the same as if
-  ;; we were optimizing $1 before $0 were added to the module.
+  ;; we were optimizing $1 before $0 were added to the module. (In fact, we will
+  ;; be able to see that the cast cannot succeed, and will optimize it into an
+  ;; unreachable.)
   (call $0)
   (drop
    (call_ref

@@ -143,18 +143,24 @@
 
  ;; CHECK:      (func $fallthrough-bad-type (result i32)
  ;; CHECK-NEXT:  (call_ref
- ;; CHECK-NEXT:   (ref.cast
- ;; CHECK-NEXT:    (ref.func $return-nothing)
- ;; CHECK-NEXT:    (rtt.canon $none_=>_i32)
+ ;; CHECK-NEXT:   (block
+ ;; CHECK-NEXT:    (drop
+ ;; CHECK-NEXT:     (ref.func $return-nothing)
+ ;; CHECK-NEXT:    )
+ ;; CHECK-NEXT:    (drop
+ ;; CHECK-NEXT:     (rtt.canon $none_=>_i32)
+ ;; CHECK-NEXT:    )
+ ;; CHECK-NEXT:    (unreachable)
  ;; CHECK-NEXT:   )
  ;; CHECK-NEXT:  )
  ;; CHECK-NEXT: )
  (func $fallthrough-bad-type (result i32)
   ;; A fallthrough appears here, and we cast the function type to something else
   ;; in a way that is bad: the actual target function has a different return
-  ;; type than the cast type. The cast will fail at runtime, and we should not
+  ;; type than the cast type. The cast will definitely fail, and we should not
   ;; emit non-validating code here, which would happen if we replace the
-  ;; call_ref that returns nothing with a call that returns an i32.
+  ;; call_ref that returns nothing with a call that returns an i32. In fact, we
+  ;; end up optimizing the cast into an unreachable.
   (call_ref
    (ref.cast
     (ref.func $return-nothing)