LocalCSE: Fix regression from #6587 by accumulating generativity (#6591)

kripken · web-flow · commit 8a5dc1880d96 · 2024-05-15T09:42:28.000-07:00
#6587 was incorrect: It checked generativity early in an incremental manner, but it did not accumulate that information as we do with hashes. As a result we could end up optimizing something with a generative child, and sadly we lacked testing for that case. This adds incremental generativity computation alongside hashes. It also splits out this check from isRelevant. Also add a test for nested effects (as opposed to generativity), but that already worked before this PR (as we compute effects and invalidation as we go, already).
diff --git a/src/passes/LocalCSE.cpp b/src/passes/LocalCSE.cpp
@@ -217,16 +217,18 @@ struct Scanner
   // original expression that we request from.
   HashedExprs activeExprs;
 
-  // Stack of hash values of all active expressions. We store these so that we
-  // do not end up recomputing hashes of children in an N^2 manner.
-  SmallVector<size_t, 10> activeHashes;
+  // Stack of information of all active expressions. We store hash values and
+  // possibility (as computed by isPossible), which we compute incrementally so
+  // as to avoid N^2 work (which could happen if we recomputed children).
+  using HashPossibility = std::pair<size_t, bool>;
+  SmallVector<HashPossibility, 10> activeIncrementalInfo;
 
   static void doNoteNonLinear(Scanner* self, Expression** currp) {
     // We are starting a new basic block. Forget all the currently-hashed
     // expressions, as we no longer want to make connections to anything from
     // another block.
     self->activeExprs.clear();
-    self->activeHashes.clear();
+    self->activeIncrementalInfo.clear();
     // Note that we do not clear requestInfos - that is information we will use
     // later in the Applier class. That is, we've cleared all the active
     // information, leaving the things we need later.
@@ -245,19 +247,24 @@ struct Scanner
     // that are not isRelevant() (if they are the children of a relevant thing).
     auto numChildren = Properties::getNumChildren(curr);
     auto hash = ExpressionAnalyzer::shallowHash(curr);
+    auto possible = isPossible(curr);
     for (Index i = 0; i < numChildren; i++) {
-      if (activeHashes.empty()) {
+      if (activeIncrementalInfo.empty()) {
         // The child was in another block, so this expression cannot be
         // optimized.
         return;
       }
-      hash_combine(hash, activeHashes.back());
-      activeHashes.pop_back();
+      auto [currHash, currPossible] = activeIncrementalInfo.back();
+      activeIncrementalInfo.pop_back();
+      hash_combine(hash, currHash);
+      if (!currPossible) {
+        possible = false;
+      }
     }
-    activeHashes.push_back(hash);
+    activeIncrementalInfo.emplace_back(hash, possible);
 
-    // Check if this is something relevant for optimization.
-    if (!isRelevant(curr)) {
+    // Check if this is something possible and also relevant for optimization.
+    if (!possible || !isRelevant(curr)) {
       return;
     }
 
@@ -330,6 +337,32 @@ struct Scanner
       return false;
     }
 
+    // If the size is at least 3, then if we have two of them we have 6,
+    // and so adding one set+one get and removing one of the items itself
+    // is not detrimental, and may be beneficial.
+    // TODO: investigate size 2
+    auto size = Measurer::measure(curr);
+    if (options.shrinkLevel > 0 && size >= 3) {
+      return true;
+    }
+
+    // If we focus on speed, any reduction in cost is beneficial, as the
+    // cost of a get is essentially free. However, we need to balance that with
+    // the fact that the VM will also do CSE/GVN itself, so minor improvements
+    // are not worthwhile, so skip things of size 1 (like a global.get).
+    if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0 && size >= 2) {
+      return true;
+    }
+
+    return false;
+  }
+
+  // Some things are not possible, and also prevent their parents from being
+  // possible as well. This is different from isRelevant in that relevance is
+  // considered for the entire expression, including children - e.g., is the
+  // total size big enough - while isPossible checks conditions that prevent
+  // using an expression at all.
+  bool isPossible(Expression* curr) {
     // We will fully compute effects later, but consider shallow effects at this
     // early time to ignore things that cannot be optimized later, because we
     // use a greedy algorithm. Specifically, imagine we see this:
@@ -364,24 +397,7 @@ struct Scanner
       return false;
     }
 
-    // If the size is at least 3, then if we have two of them we have 6,
-    // and so adding one set+one get and removing one of the items itself
-    // is not detrimental, and may be beneficial.
-    // TODO: investigate size 2
-    auto size = Measurer::measure(curr);
-    if (options.shrinkLevel > 0 && size >= 3) {
-      return true;
-    }
-
-    // If we focus on speed, any reduction in cost is beneficial, as the
-    // cost of a get is essentially free. However, we need to balance that with
-    // the fact that the VM will also do CSE/GVN itself, so minor improvements
-    // are not worthwhile, so skip things of size 1 (like a global.get).
-    if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0 && size >= 2) {
-      return true;
-    }
-
-    return false;
+    return true;
   }
 };
 
diff --git a/test/lit/passes/local-cse.wast b/test/lit/passes/local-cse.wast
@@ -11,7 +11,9 @@
 
   ;; CHECK:      (type $2 (func (param i32)))
 
-  ;; CHECK:      (type $3 (func (result i64)))
+  ;; CHECK:      (type $3 (func (result i32)))
+
+  ;; CHECK:      (type $4 (func (result i64)))
 
   ;; CHECK:      (memory $0 100 100)
 
@@ -356,6 +358,38 @@
     )
   )
 
+  ;; CHECK:      (func $nested-calls (result i32)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (i32.add
+  ;; CHECK-NEXT:    (call $nested-calls)
+  ;; CHECK-NEXT:    (call $nested-calls)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (i32.add
+  ;; CHECK-NEXT:    (call $nested-calls)
+  ;; CHECK-NEXT:    (call $nested-calls)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (unreachable)
+  ;; CHECK-NEXT: )
+  (func $nested-calls (result i32)
+    ;; Operations that include nested effects are ignored.
+    (drop
+      (i32.add
+        (call $nested-calls)
+        (call $nested-calls)
+      )
+    )
+    (drop
+      (i32.add
+        (call $nested-calls)
+        (call $nested-calls)
+      )
+    )
+    (unreachable)
+  )
+
   ;; CHECK:      (func $many-sets (result i64)
   ;; CHECK-NEXT:  (local $temp i64)
   ;; CHECK-NEXT:  (local $1 i64)
diff --git a/test/lit/passes/local-cse_all-features.wast b/test/lit/passes/local-cse_all-features.wast
@@ -67,13 +67,13 @@
 
   ;; CHECK:      (type $2 (func (param (ref $A))))
 
-  ;; CHECK:      (type $3 (func (param (ref null $A))))
+  ;; CHECK:      (type $3 (func))
 
-  ;; CHECK:      (type $4 (func))
+  ;; CHECK:      (type $4 (func (param (ref null $A))))
 
   ;; CHECK:      (type $5 (func (param (ref null $B) (ref $A))))
 
-  ;; CHECK:      (func $struct-gets-nullable (type $3) (param $ref (ref null $A))
+  ;; CHECK:      (func $struct-gets-nullable (type $4) (param $ref (ref null $A))
   ;; CHECK-NEXT:  (local $1 i32)
   ;; CHECK-NEXT:  (drop
   ;; CHECK-NEXT:   (local.tee $1
@@ -182,7 +182,7 @@
     )
   )
 
-  ;; CHECK:      (func $creations (type $4)
+  ;; CHECK:      (func $creations (type $3)
   ;; CHECK-NEXT:  (drop
   ;; CHECK-NEXT:   (struct.new $A
   ;; CHECK-NEXT:    (i32.const 1)
@@ -233,6 +233,36 @@
     )
   )
 
+  ;; CHECK:      (func $nested-generativity (type $3)
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.eq
+  ;; CHECK-NEXT:    (struct.new_default $A)
+  ;; CHECK-NEXT:    (struct.new_default $A)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (ref.eq
+  ;; CHECK-NEXT:    (struct.new_default $A)
+  ;; CHECK-NEXT:    (struct.new_default $A)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $nested-generativity
+    ;; Operations that include nested generativity are ignored.
+    (drop
+      (ref.eq
+        (struct.new_default $A)
+        (struct.new_default $A)
+      )
+    )
+    (drop
+      (ref.eq
+        (struct.new_default $A)
+        (struct.new_default $A)
+      )
+    )
+  )
+
   ;; CHECK:      (func $structs-and-arrays-do-not-alias (type $5) (param $array (ref null $B)) (param $struct (ref $A))
   ;; CHECK-NEXT:  (local $2 i32)
   ;; CHECK-NEXT:  (array.set $B