Skip to content

OSR support for Arm64 #62831

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions eng/pipelines/coreclr/jit-experimental.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ jobs:
jobTemplate: /eng/pipelines/common/build-coreclr-and-libraries-job.yml
buildConfig: checked
platforms:
- OSX_arm64
- OSX_x64
- Linux_arm64
- Linux_x64
- windows_arm64
- windows_x64
- CoreClrTestBuildHost # Either OSX_x64 or Linux_x64
jobParameters:
Expand All @@ -35,7 +39,11 @@ jobs:
jobTemplate: /eng/pipelines/common/templates/runtimes/run-test-job.yml
buildConfig: checked
platforms:
- OSX_arm64
- OSX_x64
- Linux_arm64
- Linux_x64
- windows_arm64
- windows_x64
helixQueueGroup: ci
helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/clrdefinitions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -179,9 +179,9 @@ endif(FEATURE_ENABLE_NO_ADDRESS_SPACE_RANDOMIZATION)
add_definitions(-DFEATURE_SVR_GC)
add_definitions(-DFEATURE_SYMDIFF)
add_compile_definitions(FEATURE_TIERED_COMPILATION)
if (CLR_CMAKE_TARGET_ARCH_AMD64)
if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64)
add_compile_definitions(FEATURE_ON_STACK_REPLACEMENT)
endif (CLR_CMAKE_TARGET_ARCH_AMD64)
endif (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64)
add_compile_definitions(FEATURE_PGO)
if (CLR_CMAKE_TARGET_WIN32)
add_definitions(-DFEATURE_TYPEEQUIVALENCE)
Expand Down
31 changes: 23 additions & 8 deletions src/coreclr/inc/patchpointinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
// --------------------------------------------------------------------------------
// Describes information needed to make an OSR transition
// - location of Il-visible locals and other important state on the
// original (Tier0) method frame
// - total size of the original frame, and SP-FP delta
// original (Tier0) method frame, with respect to top of frame
// (hence these offsets will be negative as stack grows down)
// - total size of the original frame
//
// Currently the patchpoint info is independent of the IL offset of the patchpoint.
//
Expand All @@ -33,26 +34,40 @@ struct PatchpointInfo
}

// Initialize
void Initialize(unsigned localCount, int fpToSpDelta)
void Initialize(unsigned localCount, int totalFrameSize)
{
m_fpToSpDelta = fpToSpDelta;
m_totalFrameSize = totalFrameSize;
m_numberOfLocals = localCount;
m_genericContextArgOffset = -1;
m_keptAliveThisOffset = -1;
m_securityCookieOffset = -1;
m_monitorAcquiredOffset = -1;
}

// Copy
void Copy(const PatchpointInfo* original)
{
m_genericContextArgOffset = original->m_genericContextArgOffset;
m_keptAliveThisOffset = original->m_keptAliveThisOffset;
m_securityCookieOffset = original->m_securityCookieOffset;
m_monitorAcquiredOffset = original->m_monitorAcquiredOffset;

for (unsigned i = 0; i < original->m_numberOfLocals; i++)
{
m_offsetAndExposureData[i] = original->m_offsetAndExposureData[i];
}
}

// Total size of this patchpoint info record, in bytes
unsigned PatchpointInfoSize() const
{
return ComputeSize(m_numberOfLocals);
}

// FP to SP delta of the original method
int FpToSpDelta() const
// Total frame size of the original method
int TotalFrameSize() const
{
return m_fpToSpDelta;
return m_totalFrameSize;
}

// Number of locals in the original method (including special locals)
Expand Down Expand Up @@ -154,7 +169,7 @@ struct PatchpointInfo
};

unsigned m_numberOfLocals;
int m_fpToSpDelta;
int m_totalFrameSize;
int m_genericContextArgOffset;
int m_keptAliveThisOffset;
int m_securityCookieOffset;
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ class CodeGen final : public CodeGenInterface
void genEstablishFramePointer(int delta, bool reportUnwindData);
void genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState);
void genEnregisterIncomingStackArgs();
#if defined(TARGET_ARM64)
void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed);
#else
void genEnregisterOSRArgsAndLocals();
#endif
void genCheckUseBlockInit();
#if defined(UNIX_AMD64_ABI) && defined(FEATURE_SIMD)
void genClearStackVec3ArgUpperBits();
Expand Down
148 changes: 112 additions & 36 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lower.h"
#include "gcinfo.h"
#include "gcinfoencoder.h"
#include "patchpointinfo.h"

/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Expand Down Expand Up @@ -1113,9 +1114,20 @@ void CodeGen::genFuncletProlog(BasicBlock* block)

if (genFuncletInfo.fiFrameType == 1)
{
GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
INS_OPTS_PRE_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
// With OSR we may see large values for fiSpDelta1
// (we really need to probe the frame, sigh)
if (compiler->opts.IsOSR())
{
genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0);
compiler->unwindSaveRegPair(REG_FP, REG_LR, 0);
}
else
{
GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
INS_OPTS_PRE_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
}

maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now

Expand All @@ -1141,9 +1153,20 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
}
else if (genFuncletInfo.fiFrameType == 3)
{
GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
INS_OPTS_PRE_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
// With OSR we may see large values for fiSpDelta1
// (we really need to probe the frame, sigh)
if (compiler->opts.IsOSR())
{
genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0);
compiler->unwindSaveRegPair(REG_FP, REG_LR, 0);
}
else
{
GetEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
INS_OPTS_PRE_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
}

maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now
}
Expand Down Expand Up @@ -1171,17 +1194,25 @@ void CodeGen::genFuncletProlog(BasicBlock* block)

if ((genFuncletInfo.fiFrameType == 3) || (genFuncletInfo.fiFrameType == 5))
{
// Note that genFuncletInfo.fiSpDelta2 is always a negative value
assert(genFuncletInfo.fiSpDelta2 < 0);
// Note that genFuncletInfo.fiSpDelta2 is always a non-positive value
assert(genFuncletInfo.fiSpDelta2 <= 0);

// generate sub SP,SP,imm
genStackPointerAdjustment(genFuncletInfo.fiSpDelta2, REG_R2, nullptr, /* reportUnwindData */ true);
if (genFuncletInfo.fiSpDelta2 < 0)
{
genStackPointerAdjustment(genFuncletInfo.fiSpDelta2, REG_R2, nullptr, /* reportUnwindData */ true);
}
else
{
// we will only see fiSpDelta2 == 0 for osr funclets
assert(compiler->opts.IsOSR());
}
}

// This is the end of the OS-reported prolog for purposes of unwinding
compiler->unwindEndProlog();

// If there is no PSPSym (CoreRT ABI), we are done. Otherwise, we need to set up the PSPSym in the functlet frame.
// If there is no PSPSym (CoreRT ABI), we are done. Otherwise, we need to set up the PSPSym in the funclet frame.
if (compiler->lvaPSPSym != BAD_VAR_NUM)
{
if (isFilter)
Expand Down Expand Up @@ -1252,11 +1283,19 @@ void CodeGen::genFuncletEpilog()

if ((genFuncletInfo.fiFrameType == 3) || (genFuncletInfo.fiFrameType == 5))
{
// Note that genFuncletInfo.fiSpDelta2 is always a negative value
assert(genFuncletInfo.fiSpDelta2 < 0);
// Note that genFuncletInfo.fiSpDelta2 is always a non-positive value
assert(genFuncletInfo.fiSpDelta2 <= 0);

// generate add SP,SP,imm
genStackPointerAdjustment(-genFuncletInfo.fiSpDelta2, REG_R2, nullptr, /* reportUnwindData */ true);
if (genFuncletInfo.fiSpDelta2 < 0)
{
genStackPointerAdjustment(-genFuncletInfo.fiSpDelta2, REG_R2, nullptr, /* reportUnwindData */ true);
}
else
{
// we should only zee zero SpDelta2 with osr.
assert(compiler->opts.IsOSR());
}
}

regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
Expand All @@ -1269,9 +1308,21 @@ void CodeGen::genFuncletEpilog()

if (genFuncletInfo.fiFrameType == 1)
{
GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
INS_OPTS_POST_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
// With OSR we may see large values for fiSpDelta1
//
if (compiler->opts.IsOSR())
{
GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0);
compiler->unwindSaveRegPair(REG_FP, REG_LR, 0);

genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
}
else
{
GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
INS_OPTS_POST_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
}

assert(genFuncletInfo.fiSpDelta2 == 0);
assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
Expand All @@ -1293,9 +1344,21 @@ void CodeGen::genFuncletEpilog()
}
else if (genFuncletInfo.fiFrameType == 3)
{
GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
INS_OPTS_POST_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
// With OSR we may see large values for fiSpDelta1
//
if (compiler->opts.IsOSR())
{
GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, 0);
compiler->unwindSaveRegPair(REG_FP, REG_LR, 0);

genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_SCRATCH, nullptr, /* reportUnwindData */ true);
}
else
{
GetEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
INS_OPTS_POST_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
}
}
else if (genFuncletInfo.fiFrameType == 4)
{
Expand Down Expand Up @@ -1346,14 +1409,23 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
// The frame size and offsets must be finalized
assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);

genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
unsigned const PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;

// Because a method and funclets must have the same caller-relative PSPSym offset,
// if there is a PSPSym, we have to pad the funclet frame size for OSR.
//
unsigned osrPad = 0;
if (compiler->opts.IsOSR() && (PSPSize > 0))
{
osrPad = compiler->info.compPatchpointInfo->TotalFrameSize();
}

genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() - osrPad;

regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved;
assert((rsMaskSaveRegs & RBM_LR) != 0);
assert((rsMaskSaveRegs & RBM_FP) != 0);

unsigned PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0;

unsigned saveRegsCount = genCountBits(rsMaskSaveRegs);
unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize;
if (compiler->info.compIsVarArgs)
Expand All @@ -1362,23 +1434,24 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
// so that they are contiguous with the incoming stack arguments.
saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES;
}
unsigned saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN);

unsigned const saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN);

assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0);
unsigned outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);
unsigned const outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN);

unsigned maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned;
unsigned const maxFuncletFrameSizeAligned = saveRegsPlusPSPSizeAligned + osrPad + outgoingArgSpaceAligned;
assert((maxFuncletFrameSizeAligned % STACK_ALIGN) == 0);

int SP_to_FPLR_save_delta;
int SP_to_PSP_slot_delta;
int CallerSP_to_PSP_slot_delta;

unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
unsigned const funcletFrameSize = saveRegsPlusPSPSize + osrPad + compiler->lvaOutgoingArgSpaceSize;
unsigned const funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);

unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
unsigned const funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));

if (maxFuncletFrameSizeAligned <= 512)
Expand All @@ -1391,16 +1464,16 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
SP_to_FPLR_save_delta -= MAX_REG_ARG * REGSIZE_BYTES;
}

SP_to_PSP_slot_delta = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize;
SP_to_PSP_slot_delta = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad + osrPad;
CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize);

genFuncletInfo.fiFrameType = 4;
}
else
{
SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize;
SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad;
CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES);
CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES);

if (compiler->lvaOutgoingArgSpaceSize == 0)
{
Expand Down Expand Up @@ -1432,21 +1505,21 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()

SP_to_PSP_slot_delta =
compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad + saveRegsPlusPSPAlignmentPad;
CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize;
CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize);

genFuncletInfo.fiFrameType = 5;
}
else
{
SP_to_FPLR_save_delta = outgoingArgSpaceAligned;
SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad;
CallerSP_to_PSP_slot_delta =
-(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad);
CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES -
saveRegsPlusPSPAlignmentPad);

genFuncletInfo.fiFrameType = 3;
}

genFuncletInfo.fiSpDelta1 = -(int)saveRegsPlusPSPSizeAligned;
genFuncletInfo.fiSpDelta1 = -(int)(osrPad + saveRegsPlusPSPSizeAligned);
genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned;

assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned);
Expand All @@ -1468,7 +1541,10 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
printf(" Save regs: ");
dspRegMask(genFuncletInfo.fiSaveRegs);
printf("\n");
printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta);
if (compiler->opts.IsOSR())
{
printf(" OSR Pad: %d\n", osrPad);
}
printf(" SP to FP/LR save location delta: %d\n", genFuncletInfo.fiSP_to_FPLR_save_delta);
printf(" SP to PSP slot delta: %d\n", genFuncletInfo.fiSP_to_PSP_slot_delta);
printf(" SP to callee-saved area delta: %d\n", genFuncletInfo.fiSP_to_CalleeSave_delta);
Expand Down
Loading