Skip to content

[ARM] Fix frame chains with M-profile PACBTI #110285

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,12 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_iOS_SaveList;

if (PushPopSplit == ARMSubtarget::SplitR7)
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_SaveList
return STI.createAAPCSFrameChain() ? CSR_AAPCS_SplitPush_R7_SaveList
: CSR_ATPCS_SplitPush_SaveList;

if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
return CSR_AAPCS_SplitPush_R11_SaveList;

return CSR_AAPCS_SaveList;
}

Expand Down
19 changes: 11 additions & 8 deletions llvm/lib/Target/ARM/ARMCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -301,14 +301,17 @@ def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
def CSR_ATPCS_SplitPush_SwiftTail : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
R10)>;

// When enforcing an AAPCS compliant frame chain, R11 is used as the frame
// pointer even for Thumb targets, where split pushes are necessary.
// This AAPCS alternative makes sure the frame index slots match the push
// order in that case.
def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R11,
R7, R6, R5, R4,
R10, R9, R8,
(sequence "D%u", 15, 8))>;
// Sometimes we need to split the push of the callee-saved GPRs into two
// regions, to ensure that the frame chain record is set up correctly. These
// list the callee-saved registers in the order they end up on the stack, which
// depends on whether the frame pointer is r7 or r11.
def CSR_AAPCS_SplitPush_R11 : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
LR, R11,
(sequence "D%u", 15, 8))>;
def CSR_AAPCS_SplitPush_R7 : CalleeSavedRegs<(add LR, R11,
R7, R6, R5, R4,
R10, R9, R8,
(sequence "D%u", 15, 8))>;

// Constructors and destructors return 'this' in the ARM C++ ABI; since 'this'
// and the pointer return value are both passed in R0 in these cases, this can
Expand Down
145 changes: 97 additions & 48 deletions llvm/lib/Target/ARM/ARMFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,11 @@ SpillArea getSpillArea(Register Reg,
// push {r0-r10, r12} GPRCS1
// vpush {r8-d15} DPRCS1
// push {r11, lr} GPRCS2
//
// SplitR11AAPCSSignRA:
// push {r0-r10, r12} GPRSC1
// push {r11, lr} GPRCS2
// vpush {r8-d15} DPRCS1

// If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
// the top of the stack frame.
Expand Down Expand Up @@ -246,7 +251,8 @@ SpillArea getSpillArea(Register Reg,
return SpillArea::GPRCS1;

case ARM::LR:
if (Variation == ARMSubtarget::SplitR11WindowsSEH)
if (Variation == ARMSubtarget::SplitR11WindowsSEH ||
Variation == ARMSubtarget::SplitR11AAPCSSignRA)
return SpillArea::GPRCS2;
else
return SpillArea::GPRCS1;
Expand Down Expand Up @@ -859,6 +865,9 @@ static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
// This is a conservative estimation: Assume the frame pointer being r7 and
// pc("r15") up to r8 getting spilled before (= 8 registers).
int MaxRegBytes = 8 * 4;
if (PushPopSplit == ARMSubtarget::SplitR11AAPCSSignRA)
// Here, r11 can be stored below all of r4-r15.
MaxRegBytes = 11 * 4;
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
// Here, r11 can be stored below all of r4-r15 plus d8-d15.
MaxRegBytes = 11 * 4 + 8 * 8;
Expand Down Expand Up @@ -931,17 +940,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}

// Determine spill area sizes, and some important frame indices.
SpillArea FramePtrSpillArea;
bool BeforeFPPush = true;
for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
int FI = I.getFrameIdx();

if (Reg == FramePtr)
SpillArea Area = getSpillArea(Reg, PushPopSplit,
AFI->getNumAlignedDPRCS2Regs(), RegInfo);

if (Reg == FramePtr) {
FramePtrSpillFI = FI;
FramePtrSpillArea = Area;
}
if (Reg == ARM::D8)
D8SpillFI = FI;

switch (getSpillArea(Reg, PushPopSplit, AFI->getNumAlignedDPRCS2Regs(),
RegInfo)) {
switch (Area) {
case SpillArea::FPCXT:
FPCXTSaveSize += 4;
break;
Expand All @@ -968,21 +983,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Move past FPCXT area.
if (FPCXTSaveSize > 0) {
LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, BeforeFPPush);
}

// Allocate the vararg register save area.
if (ArgRegsSaveSize) {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
MachineInstr::FrameSetup);
LastPush = std::prev(MBBI);
DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, BeforeFPPush);
}

// Move past area 1.
if (GPRCS1Size > 0) {
GPRCS1Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, BeforeFPPush);
if (FramePtrSpillArea == SpillArea::GPRCS1)
BeforeFPPush = false;
}

// Determine starting offsets of spill areas. These offsets are all positive
Expand All @@ -1006,21 +1023,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
} else {
DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
}
int FramePtrOffsetInPush = 0;
if (HasFP) {
// Offset from the CFA to the saved frame pointer, will be negative.
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
<< ", FPOffset: " << FPOffset << "\n");
assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
"Max FP estimation is wrong");
// Offset from the top of the GPRCS1 area to the saved frame pointer, will
// be negative.
FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
LLVM_DEBUG(dbgs() << "FramePtrOffsetInPush=" << FramePtrOffsetInPush
<< ", FramePtrSpillOffset="
<< (MFI.getObjectOffset(FramePtrSpillFI) + NumBytes)
<< "\n");
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
Expand All @@ -1032,7 +1041,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// after DPRCS1.
if (GPRCS2Size > 0 && PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
if (FramePtrSpillArea == SpillArea::GPRCS2)
BeforeFPPush = false;
}

// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
Expand All @@ -1045,7 +1056,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
else {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
MachineInstr::FrameSetup);
DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize, BeforeFPPush);
}
}

Expand All @@ -1054,7 +1065,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Since vpush register list cannot have gaps, there may be multiple vpush
// instructions in the prologue.
while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI),
BeforeFPPush);
LastPush = MBBI++;
}
}
Expand All @@ -1073,7 +1085,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// Move GPRCS2, if using using SplitR11WindowsSEH.
if (GPRCS2Size > 0 && PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
GPRCS2Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size, BeforeFPPush);
if (FramePtrSpillArea == SpillArea::GPRCS2)
BeforeFPPush = false;
}

bool NeedsWinCFIStackAlloc = NeedsWinCFI;
Expand Down Expand Up @@ -1174,28 +1188,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
// into spill area 1, including the FP in R11. In either case, it
// is in area one and the adjustment needs to take place just after
// that push.
// FIXME: The above is not necessary true when PACBTI is enabled.
// AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
// so FP ends up on area two.
MachineBasicBlock::iterator AfterPush;
if (HasFP) {
AfterPush = std::next(GPRCS1Push);
unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
int FPOffset = PushSize + FramePtrOffsetInPush;
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH) {
AfterPush = std::next(GPRCS2Push);
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
} else {
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
FramePtr, ARM::SP, FPOffset,
MachineInstr::FrameSetup);
MachineBasicBlock::iterator FPPushInst;
// Offset from SP immediately after the push which saved the FP to the FP
// save slot.
int64_t FPOffsetAfterPush;
switch (FramePtrSpillArea) {
case SpillArea::GPRCS1:
FPPushInst = GPRCS1Push;
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
ArgRegsSaveSize + FPCXTSaveSize +
sizeOfSPAdjustment(*FPPushInst);
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
<< FPOffsetAfterPush << " after that push\n");
break;
case SpillArea::GPRCS2:
FPPushInst = GPRCS2Push;
FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
sizeOfSPAdjustment(*FPPushInst);
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
FPOffsetAfterPush += DPRCSSize + DPRGapSize;
LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
<< FPOffsetAfterPush << " after that push\n");
break;
default:
llvm_unreachable("frame pointer in unknown spill area");
break;
}
AfterPush = std::next(FPPushInst);
if (PushPopSplit == ARMSubtarget::SplitR11WindowsSEH)
assert(FPOffsetAfterPush == 0);

// Emit the MOV or ADD to set up the frame pointer register.
emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
FramePtr, ARM::SP, FPOffsetAfterPush,
MachineInstr::FrameSetup);

if (!NeedsWinCFI) {
if (FramePtrOffsetInPush + PushSize != 0) {
// Emit DWARF info to find the CFA using the frame pointer from this
// point onward.
if (FPOffsetAfterPush != 0) {
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
nullptr, MRI->getDwarfRegNum(FramePtr, true),
FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
-MFI.getObjectOffset(FramePtrSpillFI)));
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
Expand Down Expand Up @@ -1708,7 +1745,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
PushPopSplit != ARMSubtarget::SplitR11WindowsSEH) {
(PushPopSplit != ARMSubtarget::SplitR11WindowsSEH &&
PushPopSplit != ARMSubtarget::SplitR11AAPCSSignRA)) {
Reg = ARM::PC;
// Fold the return instruction into the LDM.
DeleteRet = true;
Expand Down Expand Up @@ -2940,18 +2978,29 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
if (AFI.shouldSignReturnAddress()) {
// The order of register must match the order we push them, because the
// PEI assigns frame indices in that order. When compiling for return
// address sign and authenication, we use split push, therefore the orders
// we want are:
// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
CSI.insert(find_if(CSI,
[=](const auto &CS) {
Register Reg = CS.getReg();
return Reg == ARM::R10 || Reg == ARM::R11 ||
Reg == ARM::R8 || Reg == ARM::R9 ||
ARM::DPRRegClass.contains(Reg);
}),
CalleeSavedInfo(ARM::R12));
// PEI assigns frame indices in that order. That order depends on the
// PushPopSplitVariation, there are only two cases which we use with return
// address signing:
switch (STI.getPushPopSplitVariation(MF)) {
case ARMSubtarget::SplitR7:
// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
CSI.insert(find_if(CSI,
[=](const auto &CS) {
Register Reg = CS.getReg();
return Reg == ARM::R10 || Reg == ARM::R11 ||
Reg == ARM::R8 || Reg == ARM::R9 ||
ARM::DPRRegClass.contains(Reg);
}),
CalleeSavedInfo(ARM::R12));
break;
case ARMSubtarget::SplitR11AAPCSSignRA:
// With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
// on the stack.
CSI.insert(CSI.begin(), CalleeSavedInfo(ARM::R12));
break;
default:
llvm_unreachable("Unexpected CSR split with return address signing");
}
}

return false;
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Target/ARM/ARMSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,5 +514,12 @@ ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
F.needsUnwindTableEntry() &&
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
return SplitR11WindowsSEH;

// Returns R11SplitAAPCSBranchSigning if R11 and lr are not adjacent to each
// other in the list of callee saved registers in a frame, and branch
// signing is enabled.
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
getFramePointerReg() == ARM::R11)
return SplitR11AAPCSSignRA;
return NoSplit;
}
12 changes: 12 additions & 0 deletions llvm/lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,18 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
/// vpush {d8-d15}
/// push {r11, lr}
SplitR11WindowsSEH,

/// When generating AAPCS-compilant frame chains, R11 is the frame pointer,
/// and must be pushed adjacent to the return address (LR). Normally this
/// isn't a problem, because the only register between them is r12, which is
/// the intra-procedure-call scratch register, so doesn't need to be saved.
/// However, when PACBTI is in use, r12 contains the authentication code, so
/// does need to be saved. This means that we need a separate push for R11
/// and LR.
/// push {r0-r10, r12}
/// push {r11, lr}
/// vpush {d8-d15}
SplitR11AAPCSSignRA,
};

protected:
Expand Down
Loading
Loading