Skip to content

Commit f4c038b

Browse files
committed
Fix changes to stackmaps made by the register allocator.
The register allocator may insert spills and spill reloads around function calls. Unfortunately, it doesn't take stackmap instructions into account, so spill reloads may be placed inbetween a call and the stackmap attached to it. This changes the offset of the stackmap and the variables it tracks, which is information we rely upon for deoptimisation. This adds a new pass which "reverts" these changes, by moving the stackmap instruction back to right below the call, and updates its operand (i.e. tracked variables) as appropriate.
1 parent 9fdf6af commit f4c038b

File tree

8 files changed

+326
-1
lines changed

8 files changed

+326
-1
lines changed

llvm/include/llvm/CodeGen/Passes.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -571,6 +571,11 @@ namespace llvm {
571571
/// caller saved registers with stack slots.
572572
extern char &FixupStatepointCallerSavedID;
573573

574+
/// This pass fixes stackmaps by moving the STACKMAP instruction back to its
575+
/// pre-regalloc location, and reverting its operands back to the original
576+
/// values (before spill reloads).
577+
extern char &FixStackmapsSpillReloadsID;
578+
574579
/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics
575580
/// or split the data to two <128 x i32>.
576581
FunctionPass *createX86LowerAMXTypePass();

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ void initializeFinalizeISelPass(PassRegistry&);
139139
void initializeFinalizeMachineBundlesPass(PassRegistry&);
140140
void initializeFixIrreduciblePass(PassRegistry &);
141141
void initializeFixupStatepointCallerSavedPass(PassRegistry&);
142+
void initializeFixStackmapsSpillReloadsPass(PassRegistry&);
142143
void initializeFlattenCFGLegacyPassPass(PassRegistry &);
143144
void initializeFloat2IntLegacyPassPass(PassRegistry&);
144145
void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&);

llvm/lib/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,7 @@ add_llvm_component_library(LLVMCodeGen
216216
StackFrameLayoutAnalysisPass.cpp
217217
StackMapLivenessAnalysis.cpp
218218
StackMaps.cpp
219+
Yk/FixStackmapsSpillReloads.cpp
219220
StackProtector.cpp
220221
StackSlotColoring.cpp
221222
SwiftErrorValueTracking.cpp

llvm/lib/CodeGen/CodeGen.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
5454
initializeFinalizeISelPass(Registry);
5555
initializeFinalizeMachineBundlesPass(Registry);
5656
initializeFixupStatepointCallerSavedPass(Registry);
57+
initializeFixStackmapsSpillReloadsPass(Registry);
5758
initializeFuncletLayoutPass(Registry);
5859
initializeGCMachineCodeAnalysisPass(Registry);
5960
initializeGCModuleInfoPass(Registry);

llvm/lib/CodeGen/TargetPassConfig.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@
5656

5757
using namespace llvm;
5858

59+
extern bool YkStackmapsSpillReloadsFix;
60+
5961
static cl::opt<bool>
6062
EnableIPRA("enable-ipra", cl::init(false), cl::Hidden,
6163
cl::desc("Enable interprocedural register allocation "
@@ -1230,6 +1232,13 @@ void TargetPassConfig::addMachinePasses() {
12301232
// Expand pseudo instructions before second scheduling pass.
12311233
addPass(&ExpandPostRAPseudosID);
12321234

1235+
// Add pass to revert stackmap instructions altered by register allocation.
1236+
// We need to insert this pass late so that spill offsets will have been
1237+
// calculated.
1238+
if (YkStackmapsSpillReloadsFix) {
1239+
addPass(&FixStackmapsSpillReloadsID);
1240+
}
1241+
12331242
// Run pre-sched2 passes.
12341243
addPreSched2();
12351244

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
//===-- FixStackmapsSpillReloads.cpp - Fix spills before stackmaps --------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass fixes stackmaps in regards to spill reloads inserted by the
10+
// register allocator. For example, if we have the LLVM IR
11+
//
12+
// call foo($10, $11)
13+
// call llvm.experimental.stackmaps(1, 0, $8, $9)
14+
//
15+
// After register allocation we might get something like
16+
//
17+
// movrr $rbx, $rsi
18+
// movmr $rbp, -8, $rdi
19+
// ...
20+
// call foo($rsi, $rdi)
21+
// movrr $rsi, $rbx
22+
// movrm $rdi, $rbp, -8
23+
// STACKMAP $rsi, $rdi
24+
//
25+
// In order to pass arguments to foo, the register allocator had to spill the
26+
// values in $rdi and $rsi into another register or onto the stack before the
27+
// call. Then immediately after the call it inserted instructions to reload
28+
// the spilled values back into the original registers. Since during
29+
// deoptimisation we return to immediately after the call, the stackmap is now
30+
// tracking the wrong values, e.g. in this case $rdi and $rsi instead of the
31+
// spill locations.
32+
//
33+
// This pass interates over all basic blocks, finds spill reloads inserted
34+
// inbetween a call and stackmap, replaces the stackmap operands with the
35+
// spill reloads, and then moves the stackmap instruction up just below the
36+
// call.
37+
//===----------------------------------------------------------------------===//
38+
39+
#include "llvm/CodeGen/MachineBasicBlock.h"
40+
#include "llvm/CodeGen/MachineFrameInfo.h"
41+
#include "llvm/CodeGen/MachineFunction.h"
42+
#include "llvm/CodeGen/MachineFunctionPass.h"
43+
#include "llvm/CodeGen/MachineInstr.h"
44+
#include "llvm/CodeGen/MachineInstrBuilder.h"
45+
#include "llvm/CodeGen/MachineOperand.h"
46+
#include "llvm/CodeGen/Passes.h"
47+
#include "llvm/CodeGen/StackMaps.h"
48+
#include "llvm/CodeGen/TargetInstrInfo.h"
49+
#include "llvm/IR/DebugLoc.h"
50+
#include "llvm/InitializePasses.h"
51+
#include "llvm/Support/Debug.h"
52+
53+
using namespace llvm;
54+
55+
#define DEBUG_TYPE "fix-stackmaps-spill-reloads"
56+
57+
namespace {
58+
59+
class FixStackmapsSpillReloads : public MachineFunctionPass {
60+
public:
61+
static char ID;
62+
63+
FixStackmapsSpillReloads() : MachineFunctionPass(ID) {
64+
initializeFixStackmapsSpillReloadsPass(*PassRegistry::getPassRegistry());
65+
}
66+
67+
void getAnalysisUsage(AnalysisUsage &AU) const override {
68+
AU.setPreservesCFG();
69+
MachineFunctionPass::getAnalysisUsage(AU);
70+
}
71+
72+
StringRef getPassName() const override {
73+
return "Stackmaps Fix Post RegAlloc Pass";
74+
}
75+
76+
bool runOnMachineFunction(MachineFunction &MF) override;
77+
};
78+
79+
} // namespace
80+
81+
char FixStackmapsSpillReloads::ID = 0;
82+
char &llvm::FixStackmapsSpillReloadsID = FixStackmapsSpillReloads::ID;
83+
84+
INITIALIZE_PASS_BEGIN(FixStackmapsSpillReloads, DEBUG_TYPE, "Fixup Stackmap Spills",
85+
false, false)
86+
INITIALIZE_PASS_END(FixStackmapsSpillReloads, DEBUG_TYPE, "Fixup Stackmap Spills",
87+
false, false)
88+
89+
90+
bool FixStackmapsSpillReloads::runOnMachineFunction(MachineFunction &MF) {
91+
bool Changed = false;
92+
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
93+
for (MachineBasicBlock &MBB : MF) {
94+
bool Collect = false;
95+
std::set<MachineInstr *> Erased;
96+
MachineInstr *LastCall = nullptr;
97+
std::map<Register, MachineInstr *> Spills;
98+
for (MachineInstr &MI : MBB) {
99+
if (MI.isCall() && !MI.isInlineAsm()) {
100+
// YKFIXME: Do we need to check for intrinsics here or have they been
101+
// removed during lowering?
102+
if (MI.getOpcode() != TargetOpcode::STACKMAP &&
103+
MI.getOpcode() != TargetOpcode::PATCHPOINT) {
104+
// If we see a normal function call we know it will be followed by a
105+
// STACKMAP instruction. Set `Collect` to `true` to collect all spill
106+
// reload instructions between this call and the STACKMAP instruction.
107+
// Also remember this call, so we can insert the new STACKMAP
108+
// instruction right below it.
109+
Collect = true;
110+
LastCall = &MI;
111+
Spills.clear();
112+
continue;
113+
}
114+
}
115+
116+
if (MI.getOpcode() == TargetOpcode::STACKMAP) {
117+
if (LastCall == nullptr) {
118+
// There wasn't a call preceeding this stackmap, so this must be
119+
// attached to a branch instruction.
120+
continue;
121+
}
122+
Collect = false;
123+
// Assemble a new stackmap instruction by copying over the operands of
124+
// the old instruction to the new one, while replacing spilled operands
125+
// as we go.
126+
MachineInstr *NewMI =
127+
MF.CreateMachineInstr(TII->get(TargetOpcode::STACKMAP), MI.getDebugLoc(), true);
128+
MachineInstrBuilder MIB(MF, NewMI);
129+
// Copy ID and shadow
130+
auto *MOI = MI.operands_begin();
131+
MIB.add(*MOI); // ID
132+
MOI++;
133+
MIB.add(*MOI); // Shadow
134+
MOI++;
135+
while (MOI != MI.operands_end()) {
136+
if (MOI->isReg()) {
137+
Register Reg = MOI->getReg();
138+
// Check if the register operand in the stackmap is a restored
139+
// spill.
140+
if (Spills.count(Reg) > 0) {
141+
// Get spill reload instruction
142+
MachineInstr *SMI = Spills[Reg];
143+
int FI;
144+
if (TII->isCopyInstr(*SMI)) {
145+
// If the reload is a simple copy, e.g. $rax = $rbx,
146+
// just replace the stackmap operand with the source of the
147+
// copy instruction.
148+
MIB.add(SMI->getOperand(1));
149+
} else if (TII->isLoadFromStackSlotPostFE(*SMI, FI)) {
150+
// If the reload is a load from the stack, replace the operand
151+
// with multiple operands describing a stack location.
152+
MIB.addImm(StackMaps::IndirectMemRefOp);
153+
std::optional<unsigned> Size = SMI->getRestoreSize(TII);
154+
assert(Size.has_value() && "RestoreSize has no value.");
155+
MIB.addImm(Size.value()); // Size
156+
MIB.add(SMI->getOperand(1)); // Register
157+
MIB.add(SMI->getOperand(4)); // Offset
158+
} else {
159+
assert(false && "Unknown instruction found");
160+
}
161+
} else {
162+
MIB.add(*MOI);
163+
}
164+
MOI++;
165+
continue;
166+
}
167+
// Copy all other operands over as is.
168+
MIB.add(*MOI);
169+
switch (MOI->getImm()) {
170+
default:
171+
llvm_unreachable("Unrecognized operand type.");
172+
case StackMaps::DirectMemRefOp: {
173+
MOI++;
174+
MIB.add(*MOI); // Register
175+
MOI++;
176+
MIB.add(*MOI); // Offset
177+
break;
178+
}
179+
case StackMaps::IndirectMemRefOp: {
180+
MOI++;
181+
MIB.add(*MOI); // Size
182+
MOI++;
183+
MIB.add(*MOI); // Register
184+
MOI++;
185+
MIB.add(*MOI); // Offset
186+
break;
187+
}
188+
case StackMaps::ConstantOp: {break;}
189+
case StackMaps::NextLive: {break;}
190+
}
191+
MOI++;
192+
}
193+
// Insert the new stackmap instruction just after the last call.
194+
MI.getParent()->insertAfter(LastCall, NewMI);
195+
// Remember the old stackmap instruction for deletion later.
196+
Erased.insert(&MI);
197+
LastCall = nullptr;
198+
Changed = true;
199+
}
200+
201+
// Collect spill reloads that appear between a call and its corresponding
202+
// STACKMAP instruction.
203+
if (Collect) {
204+
int FI;
205+
if (TII->isCopyInstr(MI) || TII->isLoadFromStackSlotPostFE(MI, FI)) {
206+
Spills[MI.getOperand(0).getReg()] = &MI;
207+
}
208+
}
209+
}
210+
// Remove old stackmap instructions.
211+
for (MachineInstr *E : Erased) {
212+
E->eraseFromParent();
213+
}
214+
}
215+
216+
return Changed;
217+
}

llvm/lib/Support/Yk.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,19 @@ bool YkStackMapOffsetFix;
2323
static cl::opt<bool, true> YkStackMapOffsetFixParser(
2424
"yk-stackmap-offset-fix",
2525
cl::desc("Apply a fix to stackmaps that corrects the reported instruction "
26-
"offset in the presence of calls."),
26+
"offset in the presence of calls. (deprecated by "
27+
"yk-stackmap-spillreloads-fix)"),
2728
cl::NotHidden, cl::location(YkStackMapOffsetFix));
2829

2930
bool YkStackMapAdditionalLocs;
3031
static cl::opt<bool, true> YkStackMapAdditionalLocsParser(
3132
"yk-stackmap-add-locs",
3233
cl::desc("Encode additional locations for registers into stackmaps."),
3334
cl::NotHidden, cl::location(YkStackMapAdditionalLocs));
35+
36+
bool YkStackmapsSpillReloadsFix;
37+
static cl::opt<bool, true> YkStackMapSpillFixParser(
38+
"yk-stackmap-spillreloads-fix",
39+
cl::desc("Revert stackmaps and its operands after the register allocator "
40+
"has emitted spill reloads."),
41+
cl::NotHidden, cl::location(YkStackmapsSpillReloadsFix));
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; RUN: llc -stop-after fix-stackmaps-spill-reloads --yk-stackmap-spillreloads-fix < %s | FileCheck %s
2+
3+
; CHECK-LABEL: name: main
4+
; CHECK-LABEL: bb.0 (%ir-block.1):
5+
; CHECK-LABEL: CALL64pcrel32 target-flags(x86-plt) @foo2,
6+
; CHECK-NEXT: STACKMAP 1, 0, renamable $ebx, 3, renamable $r14d, 3, 1, 4, $rbp, -48, 3, renamable $r12d, 3, 1, 4, $rbp, -52, 3, renamable $r15d, 3, renamable $r13d, 3, implicit-def dead early-clobber $r11
7+
8+
@.str = private unnamed_addr constant [13 x i8] c"%d %d %d %d\0A\00", align 1
9+
10+
define dso_local i32 @foo(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4, i32 noundef %5, i32 noundef %6) #0 {
11+
%8 = alloca i32, align 4
12+
%9 = alloca i32, align 4
13+
%10 = alloca i32, align 4
14+
%11 = alloca i32, align 4
15+
%12 = alloca i32, align 4
16+
%13 = alloca i32, align 4
17+
%14 = alloca i32, align 4
18+
%15 = alloca i32, align 4
19+
store i32 %0, ptr %9, align 4
20+
store i32 %1, ptr %10, align 4
21+
store i32 %2, ptr %11, align 4
22+
store i32 %3, ptr %12, align 4
23+
store i32 %4, ptr %13, align 4
24+
store i32 %5, ptr %14, align 4
25+
store i32 %6, ptr %15, align 4
26+
%16 = load i32, ptr %9, align 4
27+
%17 = load i32, ptr %10, align 4
28+
%18 = load i32, ptr %11, align 4
29+
%19 = load i32, ptr %12, align 4
30+
%20 = load i32, ptr %13, align 4
31+
%21 = load i32, ptr %14, align 4
32+
%22 = load i32, ptr %15, align 4
33+
%23 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %16, i32 noundef %17, i32 noundef %18, i32 noundef %19, i32 noundef %20, i32 noundef %21, i32 noundef %22)
34+
%24 = load i32, ptr %8, align 4
35+
ret i32 %24
36+
}
37+
38+
declare i32 @printf(ptr noundef, ...) #2
39+
40+
define dso_local i32 @main(i32 noundef %0) #0 {
41+
%2 = alloca i32, align 4
42+
%3 = alloca i32, align 4
43+
%4 = alloca i32, align 4
44+
%5 = alloca i32, align 4
45+
%6 = alloca i32, align 4
46+
%7 = alloca i32, align 4
47+
%8 = alloca i32, align 4
48+
%9 = alloca i32, align 4
49+
store i32 %0, ptr %2, align 4
50+
%10 = load i32, ptr %2, align 4
51+
%11 = mul nsw i32 %10, 1
52+
store i32 %11, ptr %3, align 4
53+
%12 = load i32, ptr %2, align 4
54+
%13 = mul nsw i32 %12, 2
55+
store i32 %13, ptr %4, align 4
56+
%14 = load i32, ptr %2, align 4
57+
%15 = mul nsw i32 %14, 3
58+
store i32 %15, ptr %5, align 4
59+
%16 = load i32, ptr %2, align 4
60+
%17 = mul nsw i32 %16, 4
61+
store i32 %17, ptr %6, align 4
62+
%18 = load i32, ptr %2, align 4
63+
%19 = mul nsw i32 %18, 5
64+
store i32 %19, ptr %7, align 4
65+
%20 = load i32, ptr %2, align 4
66+
%21 = mul nsw i32 %20, 6
67+
store i32 %21, ptr %8, align 4
68+
%22 = load i32, ptr %2, align 4
69+
%23 = mul nsw i32 %22, 7
70+
store i32 %23, ptr %9, align 4
71+
%24 = call i32 @foo2(i32 noundef %23, i32 noundef %21, i32 noundef %19, i32 noundef %17, i32 noundef %15, i32 noundef %13, i32 noundef %11)
72+
call void (i64, i32, ...) @llvm.experimental.stackmap(i64 1, i32 0, i32 %11, i32 %13, i32 %15, i32 %17, i32 %19, i32 %21, i32 %23)
73+
%25 = mul nsw i32 %23, 5
74+
%26 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %11, i32 noundef %13, i32 noundef %15, i32 noundef %17, i32 noundef %19, i32 noundef %21, i32 noundef %25)
75+
ret i32 0
76+
}
77+
78+
declare void @foo2(...)
79+
declare void @llvm.experimental.stackmap(i64, i32, ...)
80+
81+
attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
82+
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
83+
attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }

0 commit comments

Comments
 (0)