-
Notifications
You must be signed in to change notification settings - Fork 786
Precompute into select arms #6212
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 62 commits
8b5f4d4
859751f
6f4449f
e9c87b7
efde43a
5106fe8
e12d243
55f9c7f
63a843f
959ad86
c69aeb0
b0e861a
c369c8e
35f366e
d88db54
025532a
a61980a
7efd928
8c63b82
6a04aed
9f4a9b7
7654883
2b28e97
3529d6b
a3379ab
4e6b174
0ff3f96
c84e0d6
dde94ae
e825d06
df8d86f
01a1fad
a37cc0c
35984d7
2731967
5b782a5
634db03
135b0ff
a9f0abc
5c2fe38
7460234
fda1434
ae0e419
26f64c3
d56d28b
0cf5b7e
89e50ad
1689d4b
56f49a9
ae59dd9
0792c10
8bd5652
c8f6e0e
b48cefc
65f55b9
51cb246
c1f04e9
d813d65
a7953b9
66c43c7
ceec9d1
e6467ba
5afd4db
c561a39
6ecc7b0
6f4d81e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,16 +27,19 @@ | |
// looked at. | ||
// | ||
|
||
#include <ir/literal-utils.h> | ||
#include <ir/local-graph.h> | ||
#include <ir/manipulation.h> | ||
#include <ir/properties.h> | ||
#include <ir/utils.h> | ||
#include <pass.h> | ||
#include <support/unique_deferring_queue.h> | ||
#include <wasm-builder.h> | ||
#include <wasm-interpreter.h> | ||
#include <wasm.h> | ||
#include "ir/effects.h" | ||
#include "ir/iteration.h" | ||
#include "ir/literal-utils.h" | ||
#include "ir/local-graph.h" | ||
#include "ir/manipulation.h" | ||
#include "ir/properties.h" | ||
#include "ir/utils.h" | ||
#include "pass.h" | ||
#include "support/insert_ordered.h" | ||
#include "support/unique_deferring_queue.h" | ||
#include "wasm-builder.h" | ||
#include "wasm-interpreter.h" | ||
#include "wasm.h" | ||
|
||
namespace wasm { | ||
|
||
|
@@ -210,9 +213,16 @@ struct Precompute | |
GetValues getValues; | ||
HeapValues heapValues; | ||
|
||
bool canPartiallyPrecompute; | ||
|
||
void doWalkFunction(Function* func) { | ||
// Perform partial precomputing only when the optimization level is non- | ||
// trivial, as it is slower and less likely to help. | ||
canPartiallyPrecompute = getPassOptions().optimizeLevel >= 2; | ||
|
||
// Walk the function and precompute things. | ||
super::doWalkFunction(func); | ||
partiallyPrecompute(func); | ||
if (!propagate) { | ||
return; | ||
} | ||
|
@@ -226,11 +236,13 @@ struct Precompute | |
// another walk to apply them and perhaps other optimizations that are | ||
// unlocked. | ||
super::doWalkFunction(func); | ||
// We could also try to partially precompute again, but that is a somewhat | ||
// heavy operation, so we only do it the first time, and leave such things | ||
// for later runs of this pass and for --converge. | ||
} | ||
// Note that in principle even more cycles could find further work here, in | ||
// very rare cases. To avoid constructing a LocalGraph again just for that | ||
// unlikely chance, we leave such things for later runs of this pass and for | ||
// --converge. | ||
// unlikely chance, we leave such things for later. | ||
} | ||
|
||
template<typename T> void reuseConstantNode(T* curr, Flow flow) { | ||
|
@@ -281,6 +293,9 @@ struct Precompute | |
} | ||
if (flow.breaking()) { | ||
if (flow.breakTo == NONCONSTANT_FLOW) { | ||
// This cannot be turned into a constant, but perhaps we can partially | ||
// precompute it. | ||
considerPartiallyPrecomputing(curr); | ||
return; | ||
} | ||
if (flow.breakTo == RETURN_FLOW) { | ||
|
@@ -319,6 +334,273 @@ struct Precompute | |
} | ||
} | ||
|
||
// If we failed to precompute a constant, perhaps we can still precompute part | ||
// of an expression. Specifically, consider this case: | ||
// | ||
// (A | ||
// (select | ||
// (B) | ||
// (C) | ||
// (condition) | ||
// ) | ||
// ) | ||
// | ||
// Perhaps we can compute A(B) and A(C). If so, we can emit a better select: | ||
// | ||
// (select | ||
// (constant result of A(B)) | ||
// (constant result of A(C)) | ||
// (condition) | ||
// ) | ||
// | ||
// Note that in general for code size we want to move operations *out* of | ||
// selects and ifs (OptimizeInstructions does that), but here we are | ||
// computing two constants which replace three expressions, so it is | ||
// worthwhile. | ||
// | ||
// To do such partial precomputing, in the main pass we note selects that look | ||
// promising. If we find any then we do a second pass later just for that (as | ||
// doing so requires walking up the stack in a manner that we want to avoid in | ||
// the main pass for overhead reasons; see below). | ||
// | ||
// Note that selects are all we really need here: Other passes would turn an | ||
// if into a select if the arms are simple enough, and only in those cases | ||
// (simple arms) do we have a chance at partially precomputing. For example, | ||
// if an arm is a constant then we can, but if it is a call then we can't.) | ||
// However, there are cases like an if with arms with side effects that end in | ||
// precomputable things, that are missed atm TODO | ||
std::unordered_set<Select*> partiallyPrecomputable; | ||
|
||
void considerPartiallyPrecomputing(Expression* curr) { | ||
if (!canPartiallyPrecompute) { | ||
return; | ||
} | ||
|
||
if (auto* select = curr->dynCast<Select>()) { | ||
// We only have a reasonable hope of success if the select arms are things | ||
// like constants or global gets. At a first approximation, allow the set | ||
// of things we allow in constant initializers (but we can probably allow | ||
// more here TODO). | ||
// | ||
// We also ignore selects with no parent (that are the entire function | ||
// body) as then there is nothing to optimize into their arms. | ||
auto& wasm = *getModule(); | ||
if (Properties::isValidConstantExpression(wasm, select->ifTrue) && | ||
Properties::isValidConstantExpression(wasm, select->ifFalse) && | ||
getFunction()->body != select) { | ||
partiallyPrecomputable.insert(select); | ||
} | ||
} | ||
} | ||
|
||
// To partially precompute selects we walk up the stack from them, like this: | ||
// | ||
// (A | ||
// (B | ||
// (select | ||
// (C) | ||
// (D) | ||
// (condition) | ||
// ) | ||
// ) | ||
// ) | ||
// | ||
// First we try to apply B to C and D. If that works, we arrive at this: | ||
// | ||
// (A | ||
// (select | ||
// (constant result of B(C)) | ||
// (constant result of B(D)) | ||
// (condition) | ||
// ) | ||
// ) | ||
// | ||
// We can then proceed to perhaps apply A. However, even if we failed to apply | ||
// B then we can try to apply A and B together, because that combination may | ||
// succeed where incremental work fails, for example: | ||
// | ||
// (global $C | ||
// (struct.new ;; outer | ||
// (struct.new ;; inner | ||
// (i32.const 10) | ||
// ) | ||
// ) | ||
// ) | ||
// | ||
// (struct.get ;; outer | ||
// (struct.get ;; inner | ||
// (select | ||
// (global.get $C) | ||
// (global.get $D) | ||
// (condition) | ||
// ) | ||
// ) | ||
// ) | ||
// | ||
// Applying the inner struct.get to $C leads us to the inner struct.new, but | ||
// that is an interior pointer in the global - it is not something we can | ||
// refer to using a global.get, so precomputing it fails. However, when we | ||
// apply both struct.gets at once we arrive at the outer struct.new, which is | ||
// in fact the global $C, and we succeed. | ||
void partiallyPrecompute(Function* func) { | ||
if (!canPartiallyPrecompute || partiallyPrecomputable.empty()) { | ||
// Nothing to do. | ||
return; | ||
} | ||
|
||
// Walk the function to find the parent stacks of the promising selects. We | ||
// copy the stacks and process them later. We do it like this because if we | ||
// wanted to process stacks as we reached them then we'd trip over | ||
// ourselves: when we optimize we replace a parent, but that parent is an | ||
// expression we'll reach later in the walk, so modifying it is unsafe. | ||
struct StackFinder : public ExpressionStackWalker<StackFinder> { | ||
Precompute& parent; | ||
|
||
StackFinder(Precompute& parent) : parent(parent) {} | ||
|
||
// We will later iterate on this in the order of insertion, which keeps | ||
// things deterministic, and also usually lets us do consecutive work | ||
// like a select nested in another select's condition, simply because we | ||
// will traverse the selects in postorder (however, because we cannot | ||
// always succeed in an incremental manner - see the comment on this | ||
// function - it is possible in theory that some work can happen only in a | ||
// later execution of the pass). | ||
InsertOrderedMap<Select*, ExpressionStack> stackMap; | ||
|
||
void visitSelect(Select* curr) { | ||
if (parent.partiallyPrecomputable.count(curr)) { | ||
stackMap[curr] = expressionStack; | ||
} | ||
} | ||
} stackFinder(*this); | ||
stackFinder.walkFunction(func); | ||
|
||
// Note which expressions we've modified as we go, as it is invalid to | ||
// modify more than once. This could happen in theory in a situation like | ||
// this: | ||
// | ||
// (ternary.f32.max ;; fictional instruction for explanatory purposes | ||
// (select ..) | ||
// (select ..) | ||
// (f32.infinity) | ||
// ) | ||
// | ||
// When we consider the first select we can see that the computation result | ||
// is always infinity, so we can optimize here and replace the ternary. Then | ||
// the same thing happens with the second select, causing the ternary to be | ||
// replaced again, which is unsafe. (Note that in this example the result | ||
kripken marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// is the same either way, but at least in theory an instruction could exist | ||
// for whom there was a difference.) In practice it does not seem that wasm | ||
// has instructions capable of this atm but this code is still useful to | ||
// guard against future problems, and as a minor speedup (quickly skip code | ||
// if it was already modified). | ||
std::unordered_set<Expression*> modified; | ||
|
||
for (auto& [select, stack] : stackFinder.stackMap) { | ||
// Each stack ends in the select itself, and contains more than the select | ||
// itself (otherwise we'd have ignored the select), i.e., the select has a | ||
// parent that we can try to optimize into the arms. | ||
assert(stack.back() == select); | ||
assert(stack.size() >= 2); | ||
Index selectIndex = stack.size() - 1; | ||
assert(selectIndex >= 1); | ||
|
||
if (modified.count(select)) { | ||
// This select was modified; go to the next one. | ||
continue; | ||
} | ||
|
||
// Go up through the parents, until we can't do any more work. At each | ||
// parent we'll try to execute it and all intermediate parents into the | ||
// select arms. | ||
for (Index parentIndex = selectIndex - 1; | ||
parentIndex != Index(-1); | ||
parentIndex--) { | ||
auto* parent = stack[parentIndex]; | ||
if (modified.count(parent)) { | ||
// This parent was modified; exit the loop on parents as no upper | ||
// parent is valid to try either. | ||
break; | ||
} | ||
|
||
// If the parent lacks a concrete type then we can't move it into the | ||
// select: the select needs a concrete (and non-tuple) type. For example | ||
// if the parent is a drop or is unreachable, those are things we don't | ||
// want to handle, and we stop here (once we see one such parent we | ||
// can't expect to make any more progress). | ||
if (!parent->type.isConcrete() || parent->type.isTuple()) { | ||
break; | ||
} | ||
|
||
// We are precomputing the select arms, but leaving the condition as-is. | ||
// If the condition breaks to the parent, then we can't move the parent | ||
// into the select arms: | ||
// | ||
// (block $name ;; this must stay outside of the select | ||
// (select | ||
// (B) | ||
// (C) | ||
// (block ;; condition | ||
// (br_if $target | ||
// | ||
// Ignore all control flow for simplicity, as they aren't interesting | ||
// for us, and other passes should have removed them anyhow. | ||
if (Properties::isControlFlowStructure(parent)) { | ||
break; | ||
} | ||
|
||
// This looks promising, so try to precompute here. What we do is | ||
// precompute twice, once with the select replaced with the left arm, | ||
// and once with the right. If both succeed then we can create a new | ||
// select (with the same condition as before) whose arms are the | ||
// precomputed values. | ||
auto isValidPrecomputation = [&](const Flow& flow) { | ||
// For now we handle simple concrete values. We could also handle | ||
// breaks in principle TODO | ||
return canEmitConstantFor(flow.values) && !flow.breaking() && | ||
flow.values.isConcrete(); | ||
}; | ||
|
||
// Find the pointer to the select in its immediate parent so that we can | ||
// replace it first with one arm and then the other. | ||
auto** pointerToSelect = | ||
getChildPointerInImmediateParent(stack, selectIndex, func); | ||
*pointerToSelect = select->ifTrue; | ||
auto ifTrue = precomputeExpression(parent); | ||
if (isValidPrecomputation(ifTrue)) { | ||
*pointerToSelect = select->ifFalse; | ||
auto ifFalse = precomputeExpression(parent); | ||
if (isValidPrecomputation(ifFalse)) { | ||
// Wonderful, we can precompute here! The select can now contain the | ||
// computed values in its arms. | ||
select->ifTrue = ifTrue.getConstExpression(*getModule()); | ||
Comment on lines
+573
to
+576
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we essentially have to do twice as much work because we separately evaluate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure I see what you mean. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I misunderstood what was happening. Disregard! |
||
select->ifFalse = ifFalse.getConstExpression(*getModule()); | ||
select->finalize(); | ||
|
||
// The parent of the select is now replaced by the select. | ||
auto** pointerToParent = | ||
getChildPointerInImmediateParent(stack, parentIndex, func); | ||
*pointerToParent = select; | ||
|
||
// Update state for further iterations: Mark everything modified and | ||
// move the select to the parent's location. | ||
for (Index i = parentIndex; i <= selectIndex; i++) { | ||
modified.insert(stack[i]); | ||
} | ||
selectIndex = parentIndex; | ||
stack[selectIndex] = select; | ||
stack.resize(selectIndex + 1); | ||
} | ||
} | ||
|
||
// Whether we succeeded to precompute here or not, restore the parent's | ||
// pointer to its original state (if we precomputed, the parent is no | ||
// longer in use, but there is no harm in modifying it). | ||
*pointerToSelect = select; | ||
} | ||
} | ||
} | ||
|
||
void visitFunction(Function* curr) { | ||
// removing breaks can alter types | ||
ReFinalize().walkFunctionInModule(curr, getModule()); | ||
|
@@ -531,6 +813,30 @@ struct Precompute | |
|
||
return true; | ||
} | ||
|
||
// Helpers for partial precomputing. | ||
|
||
// Given a stack of expressions and the index of an expression in it, find | ||
// the pointer to that expression in the parent. This gives us a pointer that | ||
// allows us to replace the expression. | ||
Expression** getChildPointerInImmediateParent(const ExpressionStack& stack, | ||
Index index, | ||
Function* func) { | ||
if (index == 0) { | ||
// There is nothing above this expression, so the pointer referring to it | ||
// is the function's body. | ||
return &func->body; | ||
} | ||
|
||
auto* child = stack[index]; | ||
for (auto** currChild : ChildIterator(stack[index - 1]).children) { | ||
if (*currChild == child) { | ||
return currChild; | ||
} | ||
} | ||
|
||
WASM_UNREACHABLE("child not found in parent"); | ||
} | ||
}; | ||
|
||
Pass* createPrecomputePass() { return new Precompute(false); } | ||
|
Uh oh!
There was an error while loading. Please reload this page.