Skip to content

Commit 8a5dc18

Browse files
authored
LocalCSE: Fix regression from #6587 by accumulating generativity (#6591)
#6587 was incorrect: It checked generativity early in an incremental manner, but it did not accumulate that information as we do with hashes. As a result we could end up optimizing something with a generative child, and sadly we lacked testing for that case. This adds incremental generativity computation alongside hashes. It also splits out this check from isRelevant. Also add a test for nested effects (as opposed to generativity), but that already worked before this PR (as we compute effects and invalidation as we go, already).
1 parent 940f457 commit 8a5dc18

File tree

3 files changed

+113
-33
lines changed

3 files changed

+113
-33
lines changed

src/passes/LocalCSE.cpp

Lines changed: 44 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -217,16 +217,18 @@ struct Scanner
217217
// original expression that we request from.
218218
HashedExprs activeExprs;
219219

220-
// Stack of hash values of all active expressions. We store these so that we
221-
// do not end up recomputing hashes of children in an N^2 manner.
222-
SmallVector<size_t, 10> activeHashes;
220+
// Stack of information of all active expressions. We store hash values and
221+
// possibility (as computed by isPossible), which we compute incrementally so
222+
// as to avoid N^2 work (which could happen if we recomputed children).
223+
using HashPossibility = std::pair<size_t, bool>;
224+
SmallVector<HashPossibility, 10> activeIncrementalInfo;
223225

224226
static void doNoteNonLinear(Scanner* self, Expression** currp) {
225227
// We are starting a new basic block. Forget all the currently-hashed
226228
// expressions, as we no longer want to make connections to anything from
227229
// another block.
228230
self->activeExprs.clear();
229-
self->activeHashes.clear();
231+
self->activeIncrementalInfo.clear();
230232
// Note that we do not clear requestInfos - that is information we will use
231233
// later in the Applier class. That is, we've cleared all the active
232234
// information, leaving the things we need later.
@@ -245,19 +247,24 @@ struct Scanner
245247
// that are not isRelevant() (if they are the children of a relevant thing).
246248
auto numChildren = Properties::getNumChildren(curr);
247249
auto hash = ExpressionAnalyzer::shallowHash(curr);
250+
auto possible = isPossible(curr);
248251
for (Index i = 0; i < numChildren; i++) {
249-
if (activeHashes.empty()) {
252+
if (activeIncrementalInfo.empty()) {
250253
// The child was in another block, so this expression cannot be
251254
// optimized.
252255
return;
253256
}
254-
hash_combine(hash, activeHashes.back());
255-
activeHashes.pop_back();
257+
auto [currHash, currPossible] = activeIncrementalInfo.back();
258+
activeIncrementalInfo.pop_back();
259+
hash_combine(hash, currHash);
260+
if (!currPossible) {
261+
possible = false;
262+
}
256263
}
257-
activeHashes.push_back(hash);
264+
activeIncrementalInfo.emplace_back(hash, possible);
258265

259-
// Check if this is something relevant for optimization.
260-
if (!isRelevant(curr)) {
266+
// Check if this is something possible and also relevant for optimization.
267+
if (!possible || !isRelevant(curr)) {
261268
return;
262269
}
263270

@@ -330,6 +337,32 @@ struct Scanner
330337
return false;
331338
}
332339

340+
// If the size is at least 3, then if we have two of them we have 6,
341+
// and so adding one set+one get and removing one of the items itself
342+
// is not detrimental, and may be beneficial.
343+
// TODO: investigate size 2
344+
auto size = Measurer::measure(curr);
345+
if (options.shrinkLevel > 0 && size >= 3) {
346+
return true;
347+
}
348+
349+
// If we focus on speed, any reduction in cost is beneficial, as the
350+
// cost of a get is essentially free. However, we need to balance that with
351+
// the fact that the VM will also do CSE/GVN itself, so minor improvements
352+
// are not worthwhile, so skip things of size 1 (like a global.get).
353+
if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0 && size >= 2) {
354+
return true;
355+
}
356+
357+
return false;
358+
}
359+
360+
// Some things are not possible, and also prevent their parents from being
361+
// possible as well. This is different from isRelevant in that relevance is
362+
// considered for the entire expression, including children - e.g., is the
363+
// total size big enough - while isPossible checks conditions that prevent
364+
// using an expression at all.
365+
bool isPossible(Expression* curr) {
333366
// We will fully compute effects later, but consider shallow effects at this
334367
// early time to ignore things that cannot be optimized later, because we
335368
// use a greedy algorithm. Specifically, imagine we see this:
@@ -364,24 +397,7 @@ struct Scanner
364397
return false;
365398
}
366399

367-
// If the size is at least 3, then if we have two of them we have 6,
368-
// and so adding one set+one get and removing one of the items itself
369-
// is not detrimental, and may be beneficial.
370-
// TODO: investigate size 2
371-
auto size = Measurer::measure(curr);
372-
if (options.shrinkLevel > 0 && size >= 3) {
373-
return true;
374-
}
375-
376-
// If we focus on speed, any reduction in cost is beneficial, as the
377-
// cost of a get is essentially free. However, we need to balance that with
378-
// the fact that the VM will also do CSE/GVN itself, so minor improvements
379-
// are not worthwhile, so skip things of size 1 (like a global.get).
380-
if (options.shrinkLevel == 0 && CostAnalyzer(curr).cost > 0 && size >= 2) {
381-
return true;
382-
}
383-
384-
return false;
400+
return true;
385401
}
386402
};
387403

test/lit/passes/local-cse.wast

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111

1212
;; CHECK: (type $2 (func (param i32)))
1313

14-
;; CHECK: (type $3 (func (result i64)))
14+
;; CHECK: (type $3 (func (result i32)))
15+
16+
;; CHECK: (type $4 (func (result i64)))
1517

1618
;; CHECK: (memory $0 100 100)
1719

@@ -356,6 +358,38 @@
356358
)
357359
)
358360

361+
;; CHECK: (func $nested-calls (result i32)
362+
;; CHECK-NEXT: (drop
363+
;; CHECK-NEXT: (i32.add
364+
;; CHECK-NEXT: (call $nested-calls)
365+
;; CHECK-NEXT: (call $nested-calls)
366+
;; CHECK-NEXT: )
367+
;; CHECK-NEXT: )
368+
;; CHECK-NEXT: (drop
369+
;; CHECK-NEXT: (i32.add
370+
;; CHECK-NEXT: (call $nested-calls)
371+
;; CHECK-NEXT: (call $nested-calls)
372+
;; CHECK-NEXT: )
373+
;; CHECK-NEXT: )
374+
;; CHECK-NEXT: (unreachable)
375+
;; CHECK-NEXT: )
376+
(func $nested-calls (result i32)
377+
;; Operations that include nested effects are ignored.
378+
(drop
379+
(i32.add
380+
(call $nested-calls)
381+
(call $nested-calls)
382+
)
383+
)
384+
(drop
385+
(i32.add
386+
(call $nested-calls)
387+
(call $nested-calls)
388+
)
389+
)
390+
(unreachable)
391+
)
392+
359393
;; CHECK: (func $many-sets (result i64)
360394
;; CHECK-NEXT: (local $temp i64)
361395
;; CHECK-NEXT: (local $1 i64)

test/lit/passes/local-cse_all-features.wast

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,13 @@
6767

6868
;; CHECK: (type $2 (func (param (ref $A))))
6969

70-
;; CHECK: (type $3 (func (param (ref null $A))))
70+
;; CHECK: (type $3 (func))
7171

72-
;; CHECK: (type $4 (func))
72+
;; CHECK: (type $4 (func (param (ref null $A))))
7373

7474
;; CHECK: (type $5 (func (param (ref null $B) (ref $A))))
7575

76-
;; CHECK: (func $struct-gets-nullable (type $3) (param $ref (ref null $A))
76+
;; CHECK: (func $struct-gets-nullable (type $4) (param $ref (ref null $A))
7777
;; CHECK-NEXT: (local $1 i32)
7878
;; CHECK-NEXT: (drop
7979
;; CHECK-NEXT: (local.tee $1
@@ -182,7 +182,7 @@
182182
)
183183
)
184184

185-
;; CHECK: (func $creations (type $4)
185+
;; CHECK: (func $creations (type $3)
186186
;; CHECK-NEXT: (drop
187187
;; CHECK-NEXT: (struct.new $A
188188
;; CHECK-NEXT: (i32.const 1)
@@ -233,6 +233,36 @@
233233
)
234234
)
235235

236+
;; CHECK: (func $nested-generativity (type $3)
237+
;; CHECK-NEXT: (drop
238+
;; CHECK-NEXT: (ref.eq
239+
;; CHECK-NEXT: (struct.new_default $A)
240+
;; CHECK-NEXT: (struct.new_default $A)
241+
;; CHECK-NEXT: )
242+
;; CHECK-NEXT: )
243+
;; CHECK-NEXT: (drop
244+
;; CHECK-NEXT: (ref.eq
245+
;; CHECK-NEXT: (struct.new_default $A)
246+
;; CHECK-NEXT: (struct.new_default $A)
247+
;; CHECK-NEXT: )
248+
;; CHECK-NEXT: )
249+
;; CHECK-NEXT: )
250+
(func $nested-generativity
251+
;; Operations that include nested generativity are ignored.
252+
(drop
253+
(ref.eq
254+
(struct.new_default $A)
255+
(struct.new_default $A)
256+
)
257+
)
258+
(drop
259+
(ref.eq
260+
(struct.new_default $A)
261+
(struct.new_default $A)
262+
)
263+
)
264+
)
265+
236266
;; CHECK: (func $structs-and-arrays-do-not-alias (type $5) (param $array (ref null $B)) (param $struct (ref $A))
237267
;; CHECK-NEXT: (local $2 i32)
238268
;; CHECK-NEXT: (array.set $B

0 commit comments

Comments
 (0)