-
Notifications
You must be signed in to change notification settings - Fork 849
OptimizeInstructions: Handle trivial ref.cast and ref.test #4097
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ca36e1f
8d6f325
12209ef
06e5cec
86cdfd1
567ba4d
6d9e883
80633ac
0a4bcdf
0629cd4
7b59bee
a933c91
1690f51
2a75422
32b0f42
740d448
360e79e
c0c11ee
7469918
1a583d5
43a4057
fd57dc4
4a38d3d
7ad15fd
f7c94dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| /* | ||
| * Copyright 2021 WebAssembly Community Group participants | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| #ifndef wasm_ir_reorderer_h | ||
| #define wasm_ir_reorderer_h | ||
|
|
||
| #include <ir/effects.h> | ||
| #include <wasm-builder.h> | ||
|
|
||
| namespace wasm { | ||
|
|
||
| // | ||
| // Given two expressions that appear in a specific order - first and then | ||
| // second - this helper can create a sequence in which we return the value of | ||
| // the first. If the two expressions can be reordered, this simply returns | ||
| // | ||
| // (second, first) | ||
| // | ||
| // If side effects prevent that, it will use a local to save the value of the | ||
| // first, and return it at the end, | ||
| // | ||
| // (temp = first, second, temp) | ||
| // | ||
| Expression* getResultOfFirst(Expression* first, | ||
| Expression* second, | ||
| Function* func, | ||
| Module* wasm, | ||
| const PassOptions& passOptions) { | ||
| assert(first->type.isConcrete()); | ||
|
|
||
| Builder builder(*wasm); | ||
|
|
||
| if (EffectAnalyzer::canReorder(passOptions, wasm->features, first, second)) { | ||
| return builder.makeSequence(second, first); | ||
| } | ||
|
|
||
| auto type = first->type; | ||
| auto index = Builder::addVar(func, type); | ||
| return builder.makeBlock({builder.makeLocalSet(index, first), | ||
| second, | ||
| builder.makeLocalGet(index, type)}); | ||
| } | ||
|
|
||
| } // namespace wasm | ||
|
|
||
| #endif // wasm_ir_reorderer_h |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,7 @@ | |
| #include <ir/load-utils.h> | ||
| #include <ir/manipulation.h> | ||
| #include <ir/match.h> | ||
| #include <ir/ordering.h> | ||
| #include <ir/properties.h> | ||
| #include <ir/type-updating.h> | ||
| #include <ir/utils.h> | ||
|
|
@@ -212,20 +213,35 @@ struct OptimizeInstructions | |
|
|
||
| bool fastMath; | ||
|
|
||
| bool refinalize = false; | ||
|
|
||
| void doWalkFunction(Function* func) { | ||
| fastMath = getPassOptions().fastMath; | ||
| // first, scan locals | ||
|
|
||
| // First, scan locals. | ||
| { | ||
| LocalScanner scanner(localInfo, getPassOptions()); | ||
| scanner.setModule(getModule()); | ||
| scanner.walkFunction(func); | ||
| } | ||
| // main walk | ||
|
|
||
| // Main walk. | ||
| super::doWalkFunction(func); | ||
|
|
||
| // If we need to update parent types, do so. | ||
| if (refinalize) { | ||
| ReFinalize().walkFunctionInModule(func, getModule()); | ||
| } | ||
|
|
||
| // Final optimizations. | ||
| { | ||
| FinalOptimizer optimizer(getPassOptions()); | ||
| optimizer.walkFunction(func); | ||
| } | ||
|
|
||
| // Some patterns create locals (like when we use getResultOfFirst), which we | ||
| // may need to fix up. | ||
| TypeUpdating::handleNonDefaultableLocals(func, *getModule()); | ||
| } | ||
|
|
||
| // Set to true when one of the visitors makes a change (either replacing the | ||
|
|
@@ -1263,41 +1279,82 @@ struct OptimizeInstructions | |
| skipNonNullCast(curr->srcRef); | ||
| } | ||
|
|
||
| bool canBeCastTo(HeapType a, HeapType b) { | ||
| return HeapType::isSubType(a, b) || HeapType::isSubType(b, a); | ||
| } | ||
|
|
||
| void visitRefCast(RefCast* curr) { | ||
| if (curr->type == Type::unreachable) { | ||
| return; | ||
| } | ||
|
|
||
| Builder builder(*getModule()); | ||
| auto passOptions = getPassOptions(); | ||
|
|
||
| auto fallthrough = Properties::getFallthrough( | ||
| curr->ref, getPassOptions(), getModule()->features); | ||
|
|
||
| // If the value is a null, it will just flow through, and we do not need the | ||
| // cast. However, if that would change the type, then things are less | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this expression is null and changes type from such as a struct to an array, would that possibly make the parent or grandparent expressions of this not validate? We fix nullability but what if the heaptype itself changes? Can
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code here is actually careful about the heap type, I'll improve the comment though. Basically, we use the heap type of the RTT to create the null - that is the type on the outside. |
||
| // simple: if the original type was non-nullable, replacing it with a null | ||
| // would change the type, which can happen in e.g. | ||
| // (ref.cast (ref.as_non_null (.. (ref.null) | ||
| if (fallthrough->is<RefNull>()) { | ||
| // Replace the expression with drops of the inputs, and a null. Note that | ||
| // we provide a null of the type the outside expects - that of the rtt, | ||
| // which is what was cast to. | ||
| Expression* rep = | ||
| builder.makeBlock({builder.makeDrop(curr->ref), | ||
| builder.makeDrop(curr->rtt), | ||
| builder.makeRefNull(curr->rtt->type.getHeapType())}); | ||
| if (curr->ref->type.isNonNullable()) { | ||
| // Avoid a type change by forcing to be non-nullable. In practice, this | ||
| // would have trapped before we get here, so this is just for | ||
|
Comment on lines
+1311
to
+1312
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would this have trapped before we get here?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We receive a null value from a place that has a non-nullable type. Somehow the null falls through there, which I think is only possible using |
||
| // validation. | ||
| rep = builder.makeRefAs(RefAsNonNull, rep); | ||
| } | ||
| replaceCurrent(rep); | ||
| return; | ||
| // TODO: The optimal ordering of this and the other ref.as_non_null stuff | ||
| // later down in this functions is unclear and may be worth looking | ||
| // into. | ||
| } | ||
|
|
||
| // For the cast to be able to succeed, the value being cast must be a | ||
| // subtype of the desired type, as RTT subtyping is a subset of static | ||
| // subtyping. For example, trying to cast an array to a struct would be | ||
| // incompatible. | ||
| if (!canBeCastTo(curr->ref->type.getHeapType(), | ||
| curr->rtt->type.getHeapType())) { | ||
| // This cast cannot succeed. If the input is not a null, it will | ||
| // definitely trap. | ||
| if (fallthrough->type.isNonNullable()) { | ||
| // Our type will now be unreachable; update the parents. | ||
| refinalize = true; | ||
| replaceCurrent(builder.makeBlock({builder.makeDrop(curr->ref), | ||
| builder.makeDrop(curr->rtt), | ||
| builder.makeUnreachable()})); | ||
| return; | ||
| } | ||
| // Otherwise, we are not sure what it is, and need to wait for runtime to | ||
| // see if it is a null or not. (We've already handled the case where we | ||
| // can see the value is definitely a null at compile time, earlier.) | ||
| } | ||
|
|
||
| if (passOptions.ignoreImplicitTraps || passOptions.trapsNeverHappen) { | ||
| // A ref.cast traps when the RTTs do not line up, which can be of one of | ||
| // two sorts of issues: | ||
| // 1. The value being cast is not even a subtype of the cast type. In | ||
| // that case the RTTs trivially cannot indicate subtyping, because | ||
| // RTT subtyping is a subset of static subtyping. For example, maybe | ||
| // we are trying to cast a {i32} struct to an [f64] array. | ||
| // 2. The value is a subtype of the cast type, but the RTTs still do not | ||
| // fit. That indicates a difference between RTT subtyping and static | ||
| // subtyping. That is, the type may be right but the chain of rtt.subs | ||
| // is not. | ||
| // If we ignore a possible trap then we would like to assume that neither | ||
| // of those two situations can happen. However, we still cannot do | ||
| // anything if point 1 is a problem, that is, if the value is not a | ||
| // subtype of the cast type, as we can't remove the cast in that case - | ||
| // the wasm would not validate. But if the type *is* a subtype, then we | ||
| // can ignore a possible trap on 2 and remove it. | ||
| // | ||
| // We also do not do this if the arguments cannot be reordered. If we | ||
| // can't do that then we need to add a drop, at minimum (which may still | ||
| // be worthwhile, but depends on other optimizations kicking in, so it's | ||
| // not clearly worthwhile). | ||
| // Aside from the issue of type incompatibility as mentioned above, the | ||
| // cast can trap if the types *are* compatible but it happens to be the | ||
| // case at runtime that the value is not of the desired subtype. If we | ||
| // do not consider such traps possible, we can ignore that. Note, though, | ||
| // that we cannot do this if we cannot replace the current type with the | ||
| // reference's type. | ||
|
Comment on lines
+1345
to
+1350
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are traps possible even if
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is an unfortunate property of rtts. E.g. This is bad for optimization, so we are hoping to prototype a more static approach soon, that would be as you describe basically.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
😨 |
||
| if (HeapType::isSubType(curr->ref->type.getHeapType(), | ||
| curr->rtt->type.getHeapType()) && | ||
| canReorder(curr->ref, curr->rtt)) { | ||
| Builder builder(*getModule()); | ||
| replaceCurrent( | ||
| builder.makeSequence(builder.makeDrop(curr->rtt), curr->ref)); | ||
| curr->rtt->type.getHeapType())) { | ||
| replaceCurrent(getResultOfFirst(curr->ref, | ||
| builder.makeDrop(curr->rtt), | ||
| getFunction(), | ||
| getModule(), | ||
| passOptions)); | ||
| return; | ||
| } | ||
| } | ||
|
|
@@ -1358,6 +1415,18 @@ struct OptimizeInstructions | |
| } | ||
| } | ||
|
|
||
| void visitRefTest(RefTest* curr) { | ||
| // See above in RefCast. | ||
| if (!canBeCastTo(curr->ref->type.getHeapType(), | ||
| curr->rtt->type.getHeapType())) { | ||
| // This test cannot succeed, and will definitely return 0. | ||
| Builder builder(*getModule()); | ||
| replaceCurrent(builder.makeBlock({builder.makeDrop(curr->ref), | ||
| builder.makeDrop(curr->rtt), | ||
| builder.makeConst(int32_t(0))})); | ||
| } | ||
| } | ||
|
|
||
| void visitRefIs(RefIs* curr) { | ||
| if (curr->type == Type::unreachable) { | ||
| return; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,6 @@ | |
| (module | ||
| ;; CHECK: (type $none_=>_none (func)) | ||
| (type $none_=>_none (func)) | ||
| ;; CHECK: (type $none_=>_i32 (func (result i32))) | ||
| (type $none_=>_i32 (func (result i32))) | ||
| ;; CHECK: (func $0 | ||
| ;; CHECK-NEXT: (nop) | ||
|
|
@@ -15,10 +14,7 @@ | |
| ;; CHECK: (func $1 | ||
| ;; CHECK-NEXT: (drop | ||
| ;; CHECK-NEXT: (call_ref | ||
| ;; CHECK-NEXT: (ref.cast | ||
| ;; CHECK-NEXT: (ref.func $0) | ||
| ;; CHECK-NEXT: (rtt.canon $none_=>_i32) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: (unreachable) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to change something in the comment below? |
||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: ) | ||
|
|
@@ -28,7 +24,9 @@ | |
| ;; where it inlines, for efficiency). As part of the optimiziations, we will | ||
| ;; try to precompute the cast here, which will try to look up $0. We should | ||
| ;; not hit an assertion, rather we should skip precomputing it, the same as if | ||
| ;; we were optimizing $1 before $0 were added to the module. | ||
| ;; we were optimizing $1 before $0 were added to the module. (In fact, we will | ||
| ;; be able to see that the cast cannot succeed, and will optimize it into an | ||
| ;; unreachable.) | ||
| (call $0) | ||
| (drop | ||
| (call_ref | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -143,18 +143,24 @@ | |
|
|
||
| ;; CHECK: (func $fallthrough-bad-type (result i32) | ||
| ;; CHECK-NEXT: (call_ref | ||
| ;; CHECK-NEXT: (ref.cast | ||
| ;; CHECK-NEXT: (ref.func $return-nothing) | ||
| ;; CHECK-NEXT: (rtt.canon $none_=>_i32) | ||
| ;; CHECK-NEXT: (block | ||
| ;; CHECK-NEXT: (drop | ||
| ;; CHECK-NEXT: (ref.func $return-nothing) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: (drop | ||
| ;; CHECK-NEXT: (rtt.canon $none_=>_i32) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: (unreachable) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here too; does the comment need changing?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, thanks, I'm updating these comments too. |
||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: ) | ||
| ;; CHECK-NEXT: ) | ||
| (func $fallthrough-bad-type (result i32) | ||
| ;; A fallthrough appears here, and we cast the function type to something else | ||
| ;; in a way that is bad: the actual target function has a different return | ||
| ;; type than the cast type. The cast will fail at runtime, and we should not | ||
| ;; type than the cast type. The cast will definitely fail, and we should not | ||
| ;; emit non-validating code here, which would happen if we replace the | ||
| ;; call_ref that returns nothing with a call that returns an i32. | ||
| ;; call_ref that returns nothing with a call that returns an i32. In fact, we | ||
| ;; end up optimizing the cast into an unreachable. | ||
| (call_ref | ||
| (ref.cast | ||
| (ref.func $return-nothing) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be helpful to add a comment that
Reorderercan create locals so we need thisThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point, adding a comment.