Skip to content

Commit 203dcd5

Browse files
authored
[NFC-ish] Remove LocalGraph from LocalSubtyping (#6921)
The LocalGraph there was used for two purposes: 1. Get the list of gets and sets. 2. Get only the reachable gets and sets. It is trivial to get all the gets and sets in a much faster way, by just walking the code as this PR does. The downside is that we also consider unreachable gets and sets, so unreachable code can prevent us from optimizing, but that seems worthwhile as many passes make that assumption (and they all become maximally effective after --dce). That is the only non-NFC part here. Removing LocalGraph + the fixup code for unreachability makes this significantly shorter, and also 2-3x faster.
1 parent 2467e70 commit 203dcd5

File tree

2 files changed

+48
-86
lines changed

2 files changed

+48
-86
lines changed

src/passes/LocalSubtyping.cpp

Lines changed: 42 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -50,32 +50,49 @@ struct LocalSubtyping : public WalkerPass<PostWalker<LocalSubtyping>> {
5050
return;
5151
}
5252

53-
auto numLocals = func->getNumLocals();
53+
// Compute the list of gets and sets for each local.
54+
struct Scanner : public PostWalker<Scanner> {
55+
// Which locals are relevant for us (we can ignore non-references).
56+
std::vector<bool> relevant;
57+
58+
// The lists of gets and sets.
59+
std::vector<std::vector<LocalSet*>> setsForLocal;
60+
std::vector<std::vector<LocalGet*>> getsForLocal;
61+
62+
Scanner(Function* func) {
63+
auto numLocals = func->getNumLocals();
64+
relevant.resize(numLocals);
65+
setsForLocal.resize(numLocals);
66+
getsForLocal.resize(numLocals);
67+
68+
for (Index i = 0; i < numLocals; i++) {
69+
// TODO: Ignore params here? That may require changes below.
70+
if (func->getLocalType(i).isRef()) {
71+
relevant[i] = true;
72+
}
73+
}
5474

55-
// Compute the local graph. We need to get the list of gets and sets for
56-
// each local, so that we can do the analysis. For non-nullable locals, we
57-
// also need to know when the default value of a local is used: if so then
58-
// we cannot change that type, as if we change the local type to
59-
// non-nullable then we'd be accessing the default, which is not allowed.
60-
//
61-
// TODO: Optimize this, as LocalGraph computes more than we need, and on
62-
// more locals than we need.
63-
LocalGraph localGraph(func, getModule());
64-
65-
// For each local index, compute all the the sets and gets.
66-
std::vector<std::vector<LocalSet*>> setsForLocal(numLocals);
67-
std::vector<std::vector<LocalGet*>> getsForLocal(numLocals);
68-
69-
for (auto& [curr, _] : localGraph.locations) {
70-
if (auto* set = curr->dynCast<LocalSet>()) {
71-
setsForLocal[set->index].push_back(set);
72-
} else {
73-
auto* get = curr->cast<LocalGet>();
74-
getsForLocal[get->index].push_back(get);
75+
walk(func->body);
7576
}
76-
}
7777

78-
// Find which vars can be non-nullable.
78+
void visitLocalGet(LocalGet* curr) {
79+
if (relevant[curr->index]) {
80+
getsForLocal[curr->index].push_back(curr);
81+
}
82+
}
83+
84+
void visitLocalSet(LocalSet* curr) {
85+
if (relevant[curr->index]) {
86+
setsForLocal[curr->index].push_back(curr);
87+
}
88+
}
89+
} scanner(func);
90+
91+
auto& setsForLocal = scanner.setsForLocal;
92+
auto& getsForLocal = scanner.getsForLocal;
93+
94+
// Find which vars can be non-nullable (if a null is written, or the default
95+
// null is used, then a local cannot become non-nullable).
7996
std::unordered_set<Index> cannotBeNonNullable;
8097

8198
// All gets must be dominated structurally by sets for the local to be non-
@@ -98,7 +115,8 @@ struct LocalSubtyping : public WalkerPass<PostWalker<LocalSubtyping>> {
98115
// TODO: handle cycles of X -> Y -> X etc.
99116

100117
bool more;
101-
bool optimized = false;
118+
119+
auto numLocals = func->getNumLocals();
102120

103121
do {
104122
more = false;
@@ -148,7 +166,6 @@ struct LocalSubtyping : public WalkerPass<PostWalker<LocalSubtyping>> {
148166
assert(Type::isSubType(newType, oldType));
149167
func->vars[i - varBase] = newType;
150168
more = true;
151-
optimized = true;
152169

153170
// Update gets and tees.
154171
for (auto* get : getsForLocal[i]) {
@@ -166,50 +183,6 @@ struct LocalSubtyping : public WalkerPass<PostWalker<LocalSubtyping>> {
166183
}
167184
}
168185
} while (more);
169-
170-
// If we ever optimized, then we also need to do a final pass to update any
171-
// unreachable gets and tees. They are not seen or updated in the above
172-
// analysis, but must be fixed up for validation to work.
173-
if (optimized) {
174-
for (auto* get : FindAll<LocalGet>(func->body).list) {
175-
get->type = func->getLocalType(get->index);
176-
}
177-
for (auto* set : FindAll<LocalSet>(func->body).list) {
178-
auto newType = func->getLocalType(set->index);
179-
if (set->isTee()) {
180-
set->type = newType;
181-
set->finalize();
182-
}
183-
184-
// If this set was not processed earlier - that is, if it is in
185-
// unreachable code - then it may have an incompatible type. That is,
186-
// If we saw a reachable set that writes type A, and this set writes
187-
// type B, we may have specialized the local type to A, but the value
188-
// of type B in this unreachable set is no longer valid to write to
189-
// that local. In such a case we must do additional work.
190-
if (!Type::isSubType(set->value->type, newType)) {
191-
// The type is incompatible. To fix this, replace
192-
//
193-
// (set (bad-value))
194-
//
195-
// with
196-
//
197-
// (set (block
198-
// (drop (bad-value))
199-
// (unreachable)
200-
// ))
201-
//
202-
// (We cannot just ignore the bad value, as it may contain a break to
203-
// a target that is necessary for validation.)
204-
Builder builder(*getModule());
205-
set->value = builder.makeSequence(builder.makeDrop(set->value),
206-
builder.makeUnreachable());
207-
}
208-
}
209-
210-
// Also update their parents.
211-
ReFinalize().walkFunctionInModule(func, getModule());
212-
}
213186
}
214187
};
215188

test/lit/passes/local-subtyping.wast

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -397,28 +397,18 @@
397397
)
398398

399399
;; CHECK: (func $incompatible-sets (type $1) (result i32)
400-
;; CHECK-NEXT: (local $temp (ref $1))
400+
;; CHECK-NEXT: (local $temp (ref null $1))
401401
;; CHECK-NEXT: (local.set $temp
402402
;; CHECK-NEXT: (ref.func $incompatible-sets)
403403
;; CHECK-NEXT: )
404404
;; CHECK-NEXT: (unreachable)
405405
;; CHECK-NEXT: (drop
406406
;; CHECK-NEXT: (local.tee $temp
407-
;; CHECK-NEXT: (block
408-
;; CHECK-NEXT: (drop
409-
;; CHECK-NEXT: (ref.null nofunc)
410-
;; CHECK-NEXT: )
411-
;; CHECK-NEXT: (unreachable)
412-
;; CHECK-NEXT: )
407+
;; CHECK-NEXT: (ref.null nofunc)
413408
;; CHECK-NEXT: )
414409
;; CHECK-NEXT: )
415-
;; CHECK-NEXT: (local.tee $temp
416-
;; CHECK-NEXT: (block
417-
;; CHECK-NEXT: (drop
418-
;; CHECK-NEXT: (ref.null nofunc)
419-
;; CHECK-NEXT: )
420-
;; CHECK-NEXT: (unreachable)
421-
;; CHECK-NEXT: )
410+
;; CHECK-NEXT: (local.set $temp
411+
;; CHECK-NEXT: (ref.null nofunc)
422412
;; CHECK-NEXT: )
423413
;; CHECK-NEXT: (unreachable)
424414
;; CHECK-NEXT: )
@@ -431,9 +421,8 @@
431421
;; Make all code unreachable from here.
432422
(unreachable)
433423
;; In unreachable code, assign values that are not compatible with the more
434-
;; specific type we will optimize to. Those cannot be left as they are, and
435-
;; will be fixed up so that they validate. (All we need is validation, as
436-
;; their contents do not matter, given they are not reached.)
424+
;; specific type we will optimize to. This prevents optimization here (we
425+
;; will optimize better after --dce is run).
437426
(drop
438427
(local.tee $temp
439428
(ref.null func)

0 commit comments

Comments
 (0)