Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
8d48da0
WIP
jakobbotsch Apr 14, 2026
f1d98d1
WIP
jakobbotsch Apr 14, 2026
e3d29e7
WIP
jakobbotsch Apr 14, 2026
2179ae7
WIP
jakobbotsch Apr 14, 2026
448d7cd
Run jit-format
jakobbotsch Apr 14, 2026
10cff17
Fix
jakobbotsch Apr 14, 2026
27db1a7
Fix again
jakobbotsch Apr 14, 2026
05131c3
More hacking
jakobbotsch Apr 14, 2026
480d01e
Build break
jakobbotsch Apr 14, 2026
771df5f
Switch to restoring callee saves instead
jakobbotsch Apr 15, 2026
3474bd1
Clean up
jakobbotsch Apr 15, 2026
60aa1f8
Run jit-format
jakobbotsch Apr 15, 2026
ec1a088
Fix
jakobbotsch Apr 15, 2026
df8a012
Set FP for arm64
jakobbotsch Apr 15, 2026
a1e0380
Remove assert
jakobbotsch Apr 15, 2026
dbc2d70
Fix overriding FP during restore
jakobbotsch Apr 15, 2026
4410ba3
Clean up
jakobbotsch Apr 15, 2026
6dce58b
Implement for LA64/RV64
jakobbotsch Apr 15, 2026
d8bddb7
Clean up
jakobbotsch Apr 15, 2026
b7d705e
AltJit based fixes
jakobbotsch Apr 15, 2026
b18b0fc
Fix arm build
jakobbotsch Apr 15, 2026
20d1190
Feedback
jakobbotsch Apr 15, 2026
1e4df82
Fix build
jakobbotsch Apr 15, 2026
06e9e7f
Remove unnecessary phantom unwind
jakobbotsch Apr 16, 2026
05ffa2d
Partial compilation patchpoints
jakobbotsch Apr 16, 2026
bbde19c
Merge branch 'main' into osr-arm64-restore-from-tier0
jakobbotsch Apr 16, 2026
82ad389
Support frames without fp/lr saved with callee saves
jakobbotsch Apr 17, 2026
6d784e9
Fixes
jakobbotsch Apr 17, 2026
bd302fb
Remove SPMI hack
jakobbotsch Apr 17, 2026
87e6ce5
Feedback
jakobbotsch Apr 17, 2026
e1f8e92
A few fixes
jakobbotsch Apr 17, 2026
a41738f
Store in MethodDescCodeData
jakobbotsch Apr 20, 2026
0dd6951
Fix patchpoint info
jakobbotsch Apr 17, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 10 additions & 12 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,7 @@ class CodeGen final : public CodeGenInterface
void genClearStackVec3ArgUpperBits();
#endif // UNIX_AMD64_ABI && FEATURE_SIMD

void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed);
#if defined(TARGET_ARM64)
bool genInstrWithConstant(instruction ins,
emitAttr attr,
Expand All @@ -380,9 +381,11 @@ class CodeGen final : public CodeGenInterface
int spDelta,
bool useSaveNextPair,
regNumber tmpReg,
bool* pTmpRegIsZero);
bool* pTmpRegIsZero,
bool unwindOnly = false);

Comment thread
jakobbotsch marked this conversation as resolved.
void genPrologSaveReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
void genPrologSaveReg(
regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero, bool unwindOnly);

void genEpilogRestoreRegPair(regNumber reg1,
regNumber reg2,
Expand Down Expand Up @@ -422,14 +425,15 @@ class CodeGen final : public CodeGenInterface

static int genGetSlotSizeForRegsInMask(regMaskTP regsMask);

void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset);
void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset, bool unwindOnly = false);
void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset);

void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta);
void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask,
int lowestCalleeSavedOffset,
int spDelta,
bool unwindOnly = false);
void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta);

void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed);

#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
bool genInstrWithConstant(instruction ins,
emitAttr attr,
Expand All @@ -443,16 +447,10 @@ class CodeGen final : public CodeGenInterface

void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset);
void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset);
void genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed);

#else
void genPushCalleeSavedRegisters();
#endif

#if defined(TARGET_AMD64)
void genOSRRecordTier0CalleeSavedRegistersAndFrame();
void genOSRSaveRemainingCalleeSavedRegisters();
#endif // TARGET_AMD64

void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn);

Expand Down
197 changes: 144 additions & 53 deletions src/coreclr/jit/codegenarm64.cpp

Large diffs are not rendered by default.

93 changes: 66 additions & 27 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4285,16 +4285,12 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, const ReturnTypeDesc* retTypeDesc
//------------------------------------------------------------------------
// genPushCalleeSavedRegisters: Push any callee-saved registers we have used.
//
// Arguments (arm64):
// Arguments:
// initReg - A scratch register (that gets set to zero on some platforms).
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'true' if this method sets initReg register to zero,
// 'false' if initReg was set to a non-zero value, and left unchanged if initReg was not touched.
//
#if defined(TARGET_ARM64)
void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
#else
void CodeGen::genPushCalleeSavedRegisters()
#endif
{
assert(m_compiler->compGeneratingProlog);

Expand Down Expand Up @@ -4346,6 +4342,12 @@ void CodeGen::genPushCalleeSavedRegisters()

regSet.rsMaskCalleeSaved = rsPushRegs;

if (m_compiler->opts.IsOSR())
{
PatchpointInfo* ppi = m_compiler->info.compPatchpointInfo;
rsPushRegs &= ~ppi->CalleeSaveRegisters();
}

#ifdef DEBUG
if (m_compiler->compCalleeRegsPushed != genCountBits(rsPushRegs))
{
Expand Down Expand Up @@ -4481,14 +4483,15 @@ void CodeGen::genPushCalleeSavedRegisters()

// The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the
// first save instruction as a "predecrement" amount, if possible.
int calleeSaveSpDelta = 0;
int calleeSaveSpDelta = 0;
bool saveFplr = (maskSaveRegsInt & RBM_FP) != 0;

if (isFramePointerUsed())
{
// We need to save both FP and LR.

assert((maskSaveRegsInt & RBM_FP) != 0);
assert((maskSaveRegsInt & RBM_LR) != 0);
// Either we need to save both FP and LR or none of them. The latter
// happens only for OSR functions that inherit FP/LR from the tier0
// frame.
assert(((maskSaveRegsInt & RBM_FP) != 0) == ((maskSaveRegsInt & RBM_LR) != 0));

// If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address
// (FP and LR) are protected from buffer overrun by the GS cookie. If FP/LR are at the lowest addresses,
Expand Down Expand Up @@ -4532,12 +4535,21 @@ void CodeGen::genPushCalleeSavedRegisters()

assert(totalFrameSize <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES);

GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
INS_OPTS_PRE_INDEX);
m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
if (saveFplr)
{
GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
INS_OPTS_PRE_INDEX);
m_compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);

maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
}
else
{
GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
m_compiler->unwindAllocStack(totalFrameSize);
offset = m_compiler->compLclFrameSize;
}
}
else if ((totalFrameSize <= 512) && !m_compiler->opts.compDbgEnC)
{
Expand Down Expand Up @@ -4583,14 +4595,21 @@ void CodeGen::genPushCalleeSavedRegisters()
GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
m_compiler->unwindAllocStack(totalFrameSize);

assert(m_compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
if (saveFplr)
{
assert(m_compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);

GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
m_compiler->lvaOutgoingArgSpaceSize);
m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize);
GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
m_compiler->lvaOutgoingArgSpaceSize);
m_compiler->unwindSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize);

maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
offset = (int)m_compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
}
else
{
offset = (int)m_compiler->compLclFrameSize;
}
}
}
else
Expand Down Expand Up @@ -4679,10 +4698,13 @@ void CodeGen::genPushCalleeSavedRegisters()

frameType = 3;

calleeSaveSpDeltaUnaligned -= 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
if (saveFplr)
{
calleeSaveSpDeltaUnaligned -= 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.

// We'll take care of these later, but callee-saved regs code shouldn't see them.
maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
// We'll take care of these later, but callee-saved regs code shouldn't see them.
maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
}
}

assert(calleeSaveSpDeltaUnaligned >= 0);
Expand Down Expand Up @@ -4777,7 +4799,16 @@ void CodeGen::genPushCalleeSavedRegisters()

JITDUMP(" spAdjustment2=%d\n", spAdjustment2);

genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
if (saveFplr)
{
genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg,
pInitRegZeroed);
}
else
{
genStackPointerAdjustment(-spAdjustment2, initReg, pInitRegZeroed, /* reportUnwindData */ true);
}

offset += spAdjustment2;

// Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub"
Expand All @@ -4802,8 +4833,16 @@ void CodeGen::genPushCalleeSavedRegisters()
}
else
{
genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false, initReg,
pInitRegZeroed);
if (saveFplr)
{
genPrologSaveRegPair(REG_FP, REG_LR, m_compiler->lvaOutgoingArgSpaceSize, -remainingFrameSz, false,
initReg, pInitRegZeroed);
}
else
{
genStackPointerAdjustment(-remainingFrameSz, initReg, pInitRegZeroed, /* reportUnwindData */ true);
}

offset += remainingFrameSz;

offsetSpToSavedFp = m_compiler->lvaOutgoingArgSpaceSize;
Expand Down
69 changes: 42 additions & 27 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4967,6 +4967,15 @@ void CodeGen::genFinalizeFrame()

#endif // TARGET_LOONGARCH64 || TARGET_RISCV64

#if defined(TARGET_ARM64)
// We inherit registers saved by tier0. Avoid saving those.
if (m_compiler->opts.IsOSR())
{
PatchpointInfo* ppi = m_compiler->info.compPatchpointInfo;
maskCalleeRegsPushed &= ~ppi->CalleeSaveRegisters();
}
#endif

m_compiler->compCalleeRegsPushed = genCountBits(maskCalleeRegsPushed);

#ifdef DEBUG
Expand Down Expand Up @@ -5053,11 +5062,11 @@ void CodeGen::genFnProlog()

genBeginFnProlog();

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
// For arm64 OSR, emit a "phantom prolog" to account for the actions taken
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
// For some targets, emit a "phantom prolog" to account for the actions taken
// in the tier0 frame that impact FP and SP on entry to the OSR method.
//
// x64 handles this differently; the phantom prolog unwind is emitted in
// x64/arm64 handle this differently; the phantom prolog unwind is emitted in
// genOSRRecordTier0CalleeSavedRegistersAndFrame.
Comment thread
jakobbotsch marked this conversation as resolved.
Outdated
//
if (m_compiler->opts.IsOSR())
Expand Down Expand Up @@ -5320,10 +5329,10 @@ void CodeGen::genFnProlog()

const bool isRoot = (m_compiler->funCurrentFunc()->funKind == FuncKind::FUNC_ROOT);

#ifdef TARGET_AMD64
const bool isOSRx64Root = isRoot && m_compiler->opts.IsOSR();
#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
const bool inheritsCalleeSaves = isRoot && m_compiler->opts.IsOSR();
#else
const bool isOSRx64Root = false;
const bool inheritsCalleeSaves = false;
#endif // TARGET_AMD64

regMaskTP tempMask = initRegs & RBM_ALLINT & ~excludeMask & ~regSet.rsMaskResvd;
Expand Down Expand Up @@ -5352,15 +5361,15 @@ void CodeGen::genFnProlog()
// For x64 OSR root frames, we can't use any as of yet unsaved
// callee save as initReg, as we defer saving these until later in
// the prolog, and we don't have normal arg regs.
if (isOSRx64Root)
if (inheritsCalleeSaves)
{
initReg = REG_SCRATCH; // REG_EAX
}
#elif defined(TARGET_ARM64)
// For arm64 OSR root frames, we may need a scratch register for large
// offset addresses. Use a register that won't be allocated.
//
if (isRoot && m_compiler->opts.IsOSR())
if (inheritsCalleeSaves)
{
initReg = REG_IP1;
}
Expand Down Expand Up @@ -5419,23 +5428,24 @@ void CodeGen::genFnProlog()

unsigned extraFrameSize = 0;

#ifdef TARGET_XARCH

#ifdef TARGET_AMD64
if (isOSRx64Root)
#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
if (inheritsCalleeSaves)
Comment thread
jakobbotsch marked this conversation as resolved.
{
// Account for the Tier0 callee saves
//
genOSRRecordTier0CalleeSavedRegistersAndFrame();

#ifdef TARGET_AMD64
// We don't actually push any callee saves on the OSR frame,
// but we still reserve space, so account for this when
// allocating the local frame.
//
extraFrameSize = m_compiler->compCalleeRegsPushed * REGSIZE_BYTES;
#endif
}
#endif // TARGET_AMD64
#endif

#ifdef TARGET_XARCH
if (doubleAlignOrFramePointerUsed())
{
// OSR methods handle "saving" FP specially.
Expand All @@ -5444,7 +5454,7 @@ void CodeGen::genFnProlog()
// Tier0 method. The save we do here is just to set up a
// proper RBP-based frame chain link.
//
if (isOSRx64Root && isFramePointerUsed())
if (inheritsCalleeSaves && isFramePointerUsed())
{
GetEmitter()->emitIns_R_AR(INS_mov, EA_8BYTE, initReg, REG_FPBASE, 0);
inst_RV(INS_push, initReg, TYP_REF);
Expand All @@ -5460,9 +5470,9 @@ void CodeGen::genFnProlog()
inst_RV(INS_push, REG_FPBASE, TYP_REF);
m_compiler->unwindPush(REG_FPBASE);
}
#ifndef TARGET_AMD64 // On AMD64, establish the frame pointer after the "sub rsp"
#ifdef TARGET_X86 // On AMD64, establish the frame pointer after the "sub rsp"
genEstablishFramePointer(0, /*reportUnwindData*/ true);
#endif // !TARGET_AMD64
#endif // TARGET_X86

#if DOUBLE_ALIGN
if (m_compiler->genDoubleAlign())
Expand All @@ -5476,16 +5486,21 @@ void CodeGen::genFnProlog()
}
#endif // TARGET_XARCH

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
genPushCalleeSavedRegisters(initReg, &initRegZeroed);

#else // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64
bool pushesCalleeSaves = true;
#ifdef TARGET_AMD64
// For OSR x64 we need canonical epilogs (sequence of pops). Hence we do
// not push any register in the prolog, we rather store them in the area
// allocated by the tier0 method.
// For OSR on other platforms we have no such requirement, instead we
// restore tier0 saved callee saves from its area, but then push the additional
// callee saves in the OSR method prologs as normal.
pushesCalleeSaves = !inheritsCalleeSaves;
#endif

if (!isOSRx64Root)
if (pushesCalleeSaves)
{
genPushCalleeSavedRegisters();
genPushCalleeSavedRegisters(initReg, &initRegZeroed);
}
#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64

#ifdef TARGET_ARM
bool needToEstablishFP = false;
Expand Down Expand Up @@ -5530,14 +5545,14 @@ void CodeGen::genFnProlog()
}
#endif // !TARGET_ARM64 && !TARGET_LOONGARCH64 && !TARGET_RISCV64

#ifdef TARGET_AMD64
// For x64 OSR we have to finish saving int callee saves.
// For x64 OSR we have to finish saving callee saves.
//
if (isOSRx64Root)
#ifdef TARGET_AMD64
if (inheritsCalleeSaves)
{
genOSRSaveRemainingCalleeSavedRegisters();
}
#endif // TARGET_AMD64
#endif

//-------------------------------------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenwasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ void CodeGen::genBeginFnProlog()
//------------------------------------------------------------------------
// genPushCalleeSavedRegisters: no-op since we don't need to save anything.
//
void CodeGen::genPushCalleeSavedRegisters()
void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
{
}

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10011,7 +10011,7 @@ void CodeGen::genOSRSaveRemainingCalleeSavedRegisters()
//------------------------------------------------------------------------
// genPushCalleeSavedRegisters: Push any callee-saved registers we have used.
//
void CodeGen::genPushCalleeSavedRegisters()
void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroed)
{
assert(m_compiler->compGeneratingProlog);

Comment thread
jakobbotsch marked this conversation as resolved.
Expand Down
Loading
Loading