@@ -217,16 +217,18 @@ struct Scanner
217217 // original expression that we request from.
218218 HashedExprs activeExprs;
219219
220- // Stack of hash values of all active expressions. We store these so that we
221- // do not end up recomputing hashes of children in an N^2 manner.
222- SmallVector<size_t , 10 > activeHashes;
220+ // Stack of information of all active expressions. We store hash values and
221+ // possibility (as computed by isPossible), which we compute incrementally so
222+ // as to avoid N^2 work (which could happen if we recomputed children).
223+ using HashPossibility = std::pair<size_t , bool >;
224+ SmallVector<HashPossibility, 10 > activeIncrementalInfo;
223225
224226 static void doNoteNonLinear (Scanner* self, Expression** currp) {
225227 // We are starting a new basic block. Forget all the currently-hashed
226228 // expressions, as we no longer want to make connections to anything from
227229 // another block.
228230 self->activeExprs .clear ();
229- self->activeHashes .clear ();
231+ self->activeIncrementalInfo .clear ();
230232 // Note that we do not clear requestInfos - that is information we will use
231233 // later in the Applier class. That is, we've cleared all the active
232234 // information, leaving the things we need later.
@@ -245,19 +247,24 @@ struct Scanner
245247 // that are not isRelevant() (if they are the children of a relevant thing).
246248 auto numChildren = Properties::getNumChildren (curr);
247249 auto hash = ExpressionAnalyzer::shallowHash (curr);
250+ auto possible = isPossible (curr);
248251 for (Index i = 0 ; i < numChildren; i++) {
249- if (activeHashes .empty ()) {
252+ if (activeIncrementalInfo .empty ()) {
250253 // The child was in another block, so this expression cannot be
251254 // optimized.
252255 return ;
253256 }
254- hash_combine (hash, activeHashes.back ());
255- activeHashes.pop_back ();
257+ auto [currHash, currPossible] = activeIncrementalInfo.back ();
258+ activeIncrementalInfo.pop_back ();
259+ hash_combine (hash, currHash);
260+ if (!currPossible) {
261+ possible = false ;
262+ }
256263 }
257- activeHashes. push_back (hash);
264+ activeIncrementalInfo. emplace_back (hash, possible );
258265
259- // Check if this is something relevant for optimization.
260- if (!isRelevant (curr)) {
266+ // Check if this is something possible and also relevant for optimization.
267+ if (!possible || ! isRelevant (curr)) {
261268 return ;
262269 }
263270
@@ -330,6 +337,32 @@ struct Scanner
330337 return false ;
331338 }
332339
340+ // If the size is at least 3, then if we have two of them we have 6,
341+ // and so adding one set+one get and removing one of the items itself
342+ // is not detrimental, and may be beneficial.
343+ // TODO: investigate size 2
344+ auto size = Measurer::measure (curr);
345+ if (options.shrinkLevel > 0 && size >= 3 ) {
346+ return true ;
347+ }
348+
349+ // If we focus on speed, any reduction in cost is beneficial, as the
350+ // cost of a get is essentially free. However, we need to balance that with
351+ // the fact that the VM will also do CSE/GVN itself, so minor improvements
352+ // are not worthwhile, so skip things of size 1 (like a global.get).
353+ if (options.shrinkLevel == 0 && CostAnalyzer (curr).cost > 0 && size >= 2 ) {
354+ return true ;
355+ }
356+
357+ return false ;
358+ }
359+
360+ // Some things are not possible, and also prevent their parents from being
361+ // possible as well. This is different from isRelevant in that relevance is
362+ // considered for the entire expression, including children - e.g., is the
363+ // total size big enough - while isPossible checks conditions that prevent
364+ // using an expression at all.
365+ bool isPossible (Expression* curr) {
333366 // We will fully compute effects later, but consider shallow effects at this
334367 // early time to ignore things that cannot be optimized later, because we
335368 // use a greedy algorithm. Specifically, imagine we see this:
@@ -364,24 +397,7 @@ struct Scanner
364397 return false ;
365398 }
366399
367- // If the size is at least 3, then if we have two of them we have 6,
368- // and so adding one set+one get and removing one of the items itself
369- // is not detrimental, and may be beneficial.
370- // TODO: investigate size 2
371- auto size = Measurer::measure (curr);
372- if (options.shrinkLevel > 0 && size >= 3 ) {
373- return true ;
374- }
375-
376- // If we focus on speed, any reduction in cost is beneficial, as the
377- // cost of a get is essentially free. However, we need to balance that with
378- // the fact that the VM will also do CSE/GVN itself, so minor improvements
379- // are not worthwhile, so skip things of size 1 (like a global.get).
380- if (options.shrinkLevel == 0 && CostAnalyzer (curr).cost > 0 && size >= 2 ) {
381- return true ;
382- }
383-
384- return false ;
400+ return true ;
385401 }
386402};
387403
0 commit comments