Skip to content

Commit cad9d80

Browse files
committed
BoundsCheckOpts: merge OutputSpan.append capacity checks
Sequential `OutputSpan.append` calls each emitted their own `count < capacity` check, since the inline `_precondition` lowered to `cond_fail` with no semantic apply for `hoistFixedStorageBoundsChecksInBlock` to recognise, and successive loads of the OutputSpan struct produced distinct SSA selves that fragmented the merge group. Split the check into a `_checkCanAppend` helper tagged `@_semantics("fixed_storage.check_capacity")`. Teach the pass to look through `load` / `load_borrow` for the merge-group key when the call kind is `CheckCapacity`, and to substitute the first check's self when moving subsequent ones (sound because `capacity` is invariant; the helper reads no mutable fields). `CheckIndex` keying is unchanged so reads against mutable `_count` still behave correctly. Self substitution alone is unsound across instructions that could replace the storage at the merge-group address — most commonly an opaque `@inout` callee. Before merging a `CheckCapacity` call, scan the range from the previous clustered check to the candidate and bail if any intervening apply (other than a recognized fixed-storage semantic call) or whole-storage `store` / `copy_addr` lands on the same address. Field stores reach `struct_element_addr`-derived destinations and do not bail. Four sequential appends on the same `inout OutputSpan` now fold to one merged `cond_fail` covering all four indices, mirroring `span_4_sum`.
1 parent 09dfb9b commit cad9d80

7 files changed

Lines changed: 212 additions & 9 deletions

File tree

include/swift/AST/SemanticAttrs.def

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,11 @@ SEMANTICS_ATTR(DELETE_IF_UNUSED, "sil.optimizer.delete_if_unused")
165165
SEMANTICS_ATTR(USE_FRAME_POINTER, "use_frame_pointer")
166166

167167
SEMANTICS_ATTR(FIXED_STORAGE_CHECK_INDEX, "fixed_storage.check_index")
168+
// Like `fixed_storage.check_index` but validates against `capacity`, which
169+
// must be an invariant property of the storage. The callee must depend only
170+
// on `let` fields of self, so the optimizer may merge successive checks
171+
// across mutations of `self`.
172+
SEMANTICS_ATTR(FIXED_STORAGE_CHECK_CAPACITY, "fixed_storage.check_capacity")
168173
SEMANTICS_ATTR(FIXED_STORAGE_GET_COUNT, "fixed_storage.get_count")
169174

170175
SEMANTICS_ATTR(NO_SIL_VERIFICATION, "sil.verify_none")

include/swift/SIL/InstWrappers.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,17 @@ class ForwardingOperation {
387387
bool visitForwardedValues(function_ref<bool(SILValue)> visitor);
388388
};
389389

390-
enum class FixedStorageSemanticsCallKind { None, CheckIndex, GetCount };
390+
enum class FixedStorageSemanticsCallKind {
391+
None,
392+
CheckIndex,
393+
/// Check that an index is in `0..<capacity`. Distinct from `CheckIndex`
394+
/// because `capacity` is required to be invariant for the lifetime of the
395+
/// storage. The callee must depend only on `let` fields of self, so
396+
/// successive checks across mutations of `self` may be merged and the self
397+
/// argument can be substituted across them.
398+
CheckCapacity,
399+
GetCount
400+
};
391401

392402
struct FixedStorageSemanticsCall {
393403
ApplyInst *apply = nullptr;
@@ -407,6 +417,10 @@ struct FixedStorageSemanticsCall {
407417
apply = applyInst;
408418
kind = FixedStorageSemanticsCallKind::CheckIndex;
409419
break;
420+
} else if (attr == "fixed_storage.check_capacity") {
421+
apply = applyInst;
422+
kind = FixedStorageSemanticsCallKind::CheckCapacity;
423+
break;
410424
} else if (attr == "fixed_storage.get_count") {
411425
apply = applyInst;
412426
kind = FixedStorageSemanticsCallKind::GetCount;

lib/SIL/Utils/InstWrappers.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ bool ForwardingOperation::visitForwardedValues(
104104
bool swift::isFixedStorageSemanticsCallKind(SILFunction *function) {
105105
for (auto &attr : function->getSemanticsAttrs()) {
106106
if (attr == "fixed_storage.check_index" ||
107+
attr == "fixed_storage.check_capacity" ||
107108
attr == "fixed_storage.get_count") {
108109
return true;
109110
}

lib/SILOptimizer/LoopTransforms/BoundsCheckOpts.cpp

Lines changed: 92 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1231,10 +1231,47 @@ static bool canOptimize(FixedStorageSemanticsCall call) {
12311231
return true;
12321232
}
12331233

1234+
/// Whether a fixed-storage semantic call kind participates in
1235+
/// `hoistFixedStorageBoundsChecksInBlock`'s clustering.
1236+
static bool isMergeableCheckKind(FixedStorageSemanticsCallKind kind) {
1237+
switch (kind) {
1238+
case FixedStorageSemanticsCallKind::CheckIndex:
1239+
case FixedStorageSemanticsCallKind::CheckCapacity:
1240+
return true;
1241+
case FixedStorageSemanticsCallKind::None:
1242+
case FixedStorageSemanticsCallKind::GetCount:
1243+
return false;
1244+
}
1245+
}
1246+
1247+
/// Compute a merge-group key for a fixed-storage bounds check.
1248+
///
1249+
/// `CheckIndex` calls validate the index against `count`, which may be mutated
1250+
/// in place (e.g. `OutputSpan`). For those, the key must be the literal SSA
1251+
/// self value, so checks separated by mutations stay in distinct groups.
1252+
///
1253+
/// `CheckCapacity` calls validate against `capacity`, which is invariant. When
1254+
/// self is loaded from an addressable location (an inout, a stack slot), reach
1255+
/// through the load to the underlying address so successive loads of the same
1256+
/// storage merge.
1257+
static SILValue
1258+
getFixedStorageMergeKey(const FixedStorageSemanticsCall &call) {
1259+
auto self = call.getSelf();
1260+
if (call.getKind() == FixedStorageSemanticsCallKind::CheckCapacity) {
1261+
if (auto *inst = self->getDefiningInstruction()) {
1262+
LoadOperation load{inst};
1263+
if (load)
1264+
return load.getOperand();
1265+
}
1266+
}
1267+
return self;
1268+
}
1269+
12341270
bool BoundsCheckOpts::hoistFixedStorageBoundsChecksInBlock(SILBasicBlock &block) {
12351271
bool changed = false;
12361272

1237-
// Map to track fixed storage checks grouped by self value.
1273+
// Map to track fixed storage checks grouped by self value (or, for
1274+
// CheckCapacity, the underlying address self is loaded from).
12381275
llvm::MapVector<SILValue, SmallVector<FixedStorageSemanticsCall, 4>>
12391276
checksToMerge;
12401277

@@ -1245,8 +1282,7 @@ bool BoundsCheckOpts::hoistFixedStorageBoundsChecksInBlock(SILBasicBlock &block)
12451282
for (auto &inst : block) {
12461283
FixedStorageSemanticsCall fixedStorageCall(&inst);
12471284
if (!fixedStorageCall ||
1248-
fixedStorageCall.getKind() !=
1249-
FixedStorageSemanticsCallKind::CheckIndex ||
1285+
!isMergeableCheckKind(fixedStorageCall.getKind()) ||
12501286
fixedStorageCall->getNumArguments() < 2) {
12511287
continue;
12521288
}
@@ -1255,14 +1291,14 @@ bool BoundsCheckOpts::hoistFixedStorageBoundsChecksInBlock(SILBasicBlock &block)
12551291
continue;
12561292
}
12571293

1258-
auto selfValue = fixedStorageCall.getSelf();
1294+
auto mergeKey = getFixedStorageMergeKey(fixedStorageCall);
12591295
auto indexValue = fixedStorageCall.getIndex();
12601296

1261-
// Add this check to the list for the self value
1262-
checksToMerge[selfValue].push_back(fixedStorageCall);
1297+
// Add this check to the list for the merge key
1298+
checksToMerge[mergeKey].push_back(fixedStorageCall);
12631299

12641300
LLVM_DEBUG(llvm::dbgs() << " Found fixed storage check: " << inst
1265-
<< " with self: " << *selfValue
1301+
<< " with merge key: " << *mergeKey
12661302
<< " and index: " << *indexValue);
12671303
}
12681304

@@ -1278,16 +1314,57 @@ bool BoundsCheckOpts::hoistFixedStorageBoundsChecksInBlock(SILBasicBlock &block)
12781314
// Second pass: process each self group to place checks with same self next to
12791315
// each other, preserving their original relative order.
12801316
for (auto &entry : checksToMerge) {
1317+
auto mergeKey = entry.first;
12811318
auto &checks = entry.second;
12821319
if (checks.size() <= 1) {
12831320
continue;
12841321
}
12851322

12861323
SILInstruction *insertAfter = *checks[0];
1324+
SILValue firstSelf = checks[0].getSelf();
1325+
1326+
// For CheckCapacity, two checks with different SSA selves were grouped
1327+
// because they share an underlying address (mergeKey). Substituting
1328+
// `firstSelf` for the moved check is sound only if nothing between
1329+
// the previous clustered check and `check[i]` could have replaced the
1330+
// storage at that address — a whole-storage write changes invariant
1331+
// fields the substitution depends on (`capacity`).
1332+
auto canReplaceStorage = [&](SILInstruction *inst) {
1333+
// Any opaque call may replace storage reachable through its arguments.
1334+
// Recognized fixed-storage semantic calls are read-only by contract.
1335+
if (FullApplySite::isa(inst)) {
1336+
return !static_cast<bool>(FixedStorageSemanticsCall(inst));
1337+
}
1338+
// Whole-storage stores; field stores go through `struct_element_addr`
1339+
// and have a different destination SSA value.
1340+
if (auto *store = dyn_cast<StoreInst>(inst))
1341+
return store->getDest() == mergeKey;
1342+
if (auto *copy = dyn_cast<CopyAddrInst>(inst))
1343+
return copy->getDest() == mergeKey;
1344+
return false;
1345+
};
12871346

12881347
for (unsigned i = 1, e = checks.size(); i < e; ++i) {
12891348
auto check = checks[i];
12901349

1350+
if (check.getKind() == FixedStorageSemanticsCallKind::CheckCapacity) {
1351+
bool storageReplaced = false;
1352+
for (auto it = std::next(insertAfter->getIterator()),
1353+
end = check->getIterator();
1354+
it != end; ++it) {
1355+
if (canReplaceStorage(&*it)) {
1356+
storageReplaced = true;
1357+
break;
1358+
}
1359+
}
1360+
if (storageReplaced) {
1361+
LLVM_DEBUG(llvm::dbgs()
1362+
<< " Skipping merge across storage replacement: "
1363+
<< *check.apply);
1364+
continue;
1365+
}
1366+
}
1367+
12911368
auto indexValue = check.getIndex();
12921369
auto clonedValue =
12931370
cloneFixedStorageIndex(indexValue, *checks[0], instIndices);
@@ -1296,6 +1373,14 @@ bool BoundsCheckOpts::hoistFixedStorageBoundsChecksInBlock(SILBasicBlock &block)
12961373
}
12971374
check.getIndexOperand().set(*clonedValue);
12981375
check->getCalleeOperand()->set(checks[0]->getCallee());
1376+
// For CheckCapacity, self may be a fresh load following intervening
1377+
// stores; replace it with the first check's self so the moved apply
1378+
// does not reference a value defined later in the block. This is sound
1379+
// because capacity is invariant.
1380+
if (check.getKind() == FixedStorageSemanticsCallKind::CheckCapacity &&
1381+
check.getSelf() != firstSelf) {
1382+
check->getSelfArgumentOperand().set(firstSelf);
1383+
}
12991384
check->moveAfter(insertAfter);
13001385
insertAfter = *check;
13011386

stdlib/public/core/Span/OutputSpan.swift

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -292,13 +292,25 @@ extension OutputSpan where Element: ~Copyable {
292292
@available(SwiftCompatibilitySpan 5.0, *)
293293
@_originallyDefinedIn(module: "Swift;CompatibilitySpan", SwiftCompatibilitySpan 6.2)
294294
extension OutputSpan where Element: ~Copyable {
295+
// SILOptimizer looks for fixed_storage.check_capacity semantics for bounds
296+
// check optimizations. Distinct from `_checkIndex` because `capacity` is
297+
// invariant for the lifetime of the storage, so successive checks across
298+
// mutations of `self` may be merged. Only called with `_count`, which is
299+
// non-negative by invariant, so no lower-bound check is needed.
300+
@_semantics("fixed_storage.check_capacity")
301+
@inline(__always)
302+
@_alwaysEmitIntoClient
303+
internal func _checkCanAppend(_ index: Int) {
304+
_precondition(index < capacity, "OutputSpan capacity overflow")
305+
}
306+
295307
/// Append a single element to this span.
296308
///
297309
/// - Parameter value: The element to append.
298310
@_alwaysEmitIntoClient
299311
@_lifetime(self: copy self)
300312
public mutating func append(_ value: consuming Element) {
301-
_precondition(_count < capacity, "OutputSpan capacity overflow")
313+
_checkCanAppend(_count)
302314
unsafe _tail().initializeMemory(as: Element.self, to: value)
303315
_count &+= 1
304316
}

test/SILOptimizer/mutable_span_bounds_check_tests.swift

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,43 @@ public func outputspan_get_element(_ v: borrowing OutputSpan<Int>, _ i: Int) ->
180180
return v[i]
181181
}
182182

183+
// Sequential `append`s on the same `inout OutputSpan` should fold into a
184+
// single merged capacity check covering all four indices, instead of one
185+
// check per `append`. Mirrors the `span_4_sum` test for `Span` subscripts.
186+
//
187+
// CHECK-SIL-LABEL: sil @$s31mutable_span_bounds_check_tests19outputspan_4_appendyys10OutputSpanVys5UInt8VGz_A4FtF :
188+
// CHECK-SIL: cond_fail {{.*}}, "OutputSpan capacity overflow"
189+
// CHECK-SIL-NOT: cond_fail {{.*}}, "OutputSpan capacity overflow"
190+
// CHECK-SIL-LABEL: } // end sil function '$s31mutable_span_bounds_check_tests19outputspan_4_appendyys10OutputSpanVys5UInt8VGz_A4FtF'
191+
@_lifetime(output: copy output)
192+
public func outputspan_4_append(
193+
_ output: inout OutputSpan<UInt8>,
194+
_ a: UInt8, _ b: UInt8, _ c: UInt8, _ d: UInt8
195+
) {
196+
output.append(a); output.append(b); output.append(c); output.append(d)
197+
}
198+
199+
@inline(never)
200+
@_lifetime(output: copy output)
201+
internal func _mutateOutputSpan(_ output: inout OutputSpan<UInt8>) {}
202+
203+
// An opaque `@inout` callee between two `append`s could replace the storage,
204+
// so the second capacity check must NOT be merged with the first. Both
205+
// checks must remain.
206+
//
207+
// CHECK-SIL-LABEL: sil @$s31mutable_span_bounds_check_tests31outputspan_append_across_mutateyys10OutputSpanVys5UInt8VGz_A3FtF :
208+
// CHECK-SIL: cond_fail {{.*}}, "OutputSpan capacity overflow"
209+
// CHECK-SIL: function_ref{{.*}}_mutateOutputSpan
210+
// CHECK-SIL: cond_fail {{.*}}, "OutputSpan capacity overflow"
211+
// CHECK-SIL-LABEL: } // end sil function '$s31mutable_span_bounds_check_tests31outputspan_append_across_mutateyys10OutputSpanVys5UInt8VGz_A3FtF'
212+
@_lifetime(output: copy output)
213+
public func outputspan_append_across_mutate(
214+
_ output: inout OutputSpan<UInt8>,
215+
_ a: UInt8, _ b: UInt8, _ c: UInt8
216+
) {
217+
output.append(a)
218+
_mutateOutputSpan(&output)
219+
output.append(b)
220+
output.append(c)
221+
}
222+
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// RUN: %target-run-simple-swift(-O -enable-experimental-feature Lifetimes)
2+
// RUN: %target-run-simple-swift(-Onone -enable-experimental-feature Lifetimes)
3+
4+
// REQUIRES: executable_test
5+
// REQUIRES: swift_feature_Lifetimes
6+
7+
import StdlibUnittest
8+
9+
// Regression test for BoundsCheckOpts CheckCapacity merging. Two
10+
// `OutputSpan.append` capacity checks separated by an `@inout` callee that
11+
// fills the span must not be merged using the first call's loaded count.
12+
//
13+
// The bug: `getFixedStorageMergeKey` looks through `load_borrow` for
14+
// CheckCapacity, so two checks against the same `inout` span land in one
15+
// merge group. If `cloneFixedStorageIndex` clones the second check's index
16+
// (a fresh `load` of `_count`) back past the intervening callee, the cloned
17+
// load reads stale memory and the check passes when it should trap.
18+
19+
let suite = TestSuite("OutputSpan bounds check optimization")
20+
21+
@inline(never)
22+
func fillRemaining(_ span: inout OutputSpan<Int>) {
23+
while !span.isFull {
24+
span.append(0)
25+
}
26+
}
27+
28+
@inline(never)
29+
func appendAfterFull(_ span: inout OutputSpan<Int>) {
30+
span.append(1)
31+
fillRemaining(&span)
32+
// After `fillRemaining`, count == capacity. This append must trap.
33+
span.append(99)
34+
}
35+
36+
suite.test("append after inout callee fills span") {
37+
expectCrashLater()
38+
let buffer = UnsafeMutableBufferPointer<Int>.allocate(capacity: 2)
39+
defer { buffer.deallocate() }
40+
var span = unsafe OutputSpan(buffer: buffer, initializedCount: 0)
41+
appendAfterFull(&span)
42+
expectUnreachable("third append should have trapped on capacity overflow")
43+
_ = unsafe span.finalize(for: buffer)
44+
}
45+
46+
runAllTests()

0 commit comments

Comments
 (0)