Skip to content

JIT: Support bitwise field insertions for call arguments #115977

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions src/coreclr/jit/abi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,29 @@ var_types ABIPassingSegment::GetRegisterType() const
}
}

//-----------------------------------------------------------------------------
// GetRegisterType:
// Return the smallest type larger or equal to Size that most naturally
// represents the register this segment is passed in, taking into account the
// GC info of the specified layout.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: the formatting is a bit off (2 spaces vs 3 spaces for Return value, also, missing arg section)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me address that in a follow-up

//
// Return Value:
// A type that matches ABIPassingSegment::Size and the register.
//
var_types ABIPassingSegment::GetRegisterType(ClassLayout* layout) const
{
if (genIsValidIntReg(GetRegister()))
{
assert(Offset < layout->GetSize());
if (((Offset % TARGET_POINTER_SIZE) == 0) && (Size == TARGET_POINTER_SIZE))
{
return layout->GetGCPtrType(Offset / TARGET_POINTER_SIZE);
}
}

return GetRegisterType();
}

//-----------------------------------------------------------------------------
// InRegister:
// Create an ABIPassingSegment representing that a segment is passed in a
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/abi.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class ABIPassingSegment
unsigned GetStackSize() const;

var_types GetRegisterType() const;
var_types GetRegisterType(ClassLayout* layout) const;

static ABIPassingSegment InRegister(regNumber reg, unsigned offset, unsigned size);
static ABIPassingSegment OnStack(unsigned stackOffset, unsigned offset, unsigned size);
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -11633,6 +11633,7 @@ class Compiler
#endif // defined(UNIX_AMD64_ABI)

bool fgTryMorphStructArg(CallArg* arg);
bool FieldsMatchAbi(LclVarDsc* varDsc, const ABIPassingInformation& abiInfo);

bool killGCRefs(GenTree* tree);

Expand Down
16 changes: 8 additions & 8 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2725,16 +2725,10 @@ struct GenTreeFieldList : public GenTree

class UseList
{
Use* m_head;
Use* m_tail;
Use* m_head = nullptr;
Use* m_tail = nullptr;

public:
UseList()
: m_head(nullptr)
, m_tail(nullptr)
{
}

Use* GetHead() const
{
return m_head;
Expand Down Expand Up @@ -2792,6 +2786,12 @@ struct GenTreeFieldList : public GenTree
}
}

void Clear()
{
m_head = nullptr;
m_tail = nullptr;
}

bool IsSorted() const
{
unsigned offset = 0;
Expand Down
180 changes: 139 additions & 41 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1725,20 +1725,12 @@ void Lowering::LowerArg(GenTreeCall* call, CallArg* callArg)
{
if (abiInfo.HasAnyRegisterSegment())
{
#if FEATURE_MULTIREG_ARGS
if ((abiInfo.NumSegments > 1) && arg->OperIs(GT_FIELD_LIST))
if (arg->OperIs(GT_FIELD_LIST))
{
unsigned int regIndex = 0;
for (GenTreeFieldList::Use& use : arg->AsFieldList()->Uses())
{
const ABIPassingSegment& segment = abiInfo.Segment(regIndex);
InsertPutArgReg(&use.NodeRef(), segment);

regIndex++;
}
LowerArgFieldList(callArg, arg->AsFieldList());
arg = *ppArg;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is enabled everywhere now since even single-reg args can be structs with multiple fields.

}
else
#endif // FEATURE_MULTIREG_ARGS
{
assert(abiInfo.HasExactlyOneRegisterSegment());
InsertPutArgReg(ppArg, abiInfo.Segment(0));
Expand Down Expand Up @@ -4809,6 +4801,18 @@ void Lowering::LowerRet(GenTreeOp* ret)
ContainCheckRet(ret);
}

struct LowerFieldListRegisterInfo
{
unsigned Offset;
var_types RegType;

LowerFieldListRegisterInfo(unsigned offset, var_types regType)
: Offset(offset)
, RegType(regType)
{
}
};

//----------------------------------------------------------------------------------------------
// LowerRetFieldList:
// Lower a returned FIELD_LIST node.
Expand All @@ -4822,21 +4826,18 @@ void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList)
const ReturnTypeDesc& retDesc = comp->compRetTypeDesc;
unsigned numRegs = retDesc.GetReturnRegCount();

bool isCompatible = IsFieldListCompatibleWithReturn(fieldList);
auto getRegInfo = [=, &retDesc](unsigned regIndex) {
unsigned offset = retDesc.GetReturnFieldOffset(regIndex);
var_types regType = genActualType(retDesc.GetReturnRegType(regIndex));
return LowerFieldListRegisterInfo(offset, regType);
};

bool isCompatible = IsFieldListCompatibleWithRegisters(fieldList, numRegs, getRegInfo);
if (!isCompatible)
{
JITDUMP("Spilling field list [%06u] to stack\n", Compiler::dspTreeID(fieldList));
unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Spilled local for return value"));
unsigned lclNum =
StoreFieldListToNewLocal(comp->typGetObjLayout(comp->info.compMethodInfo->args.retTypeClass), fieldList);
LclVarDsc* varDsc = comp->lvaGetDesc(lclNum);
comp->lvaSetStruct(lclNum, comp->info.compMethodInfo->args.retTypeClass, false);
comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::BlockOpRet));

for (GenTreeFieldList::Use& use : fieldList->Uses())
{
GenTree* store = comp->gtNewStoreLclFldNode(lclNum, use.GetType(), use.GetOffset(), use.GetNode());
BlockRange().InsertAfter(use.GetNode(), store);
LowerNode(store);
}

GenTree* retValue = comp->gtNewLclvNode(lclNum, varDsc->TypeGet());
ret->SetReturnValue(retValue);
Expand All @@ -4859,7 +4860,89 @@ void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList)
return;
}

LowerFieldListToFieldListOfRegisters(fieldList);
LowerFieldListToFieldListOfRegisters(fieldList, numRegs, getRegInfo);
}

//----------------------------------------------------------------------------------------------
// StoreFieldListToNewLocal:
// Create a new local with the specified layout and store the specified
// fields of the specified FIELD_LIST into it.
//
// Arguments:
// layout - Layout of the new local
// fieldList - Fields to store to it
//
// Returns:
// Var number of new local.
//
unsigned Lowering::StoreFieldListToNewLocal(ClassLayout* layout, GenTreeFieldList* fieldList)
{
JITDUMP("Spilling field list [%06u] to stack\n", Compiler::dspTreeID(fieldList));
unsigned lclNum = comp->lvaGrabTemp(true DEBUGARG("Spilled local for field list"));
LclVarDsc* varDsc = comp->lvaGetDesc(lclNum);
comp->lvaSetStruct(lclNum, layout, false);
comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::LocalField));

for (GenTreeFieldList::Use& use : fieldList->Uses())
{
GenTree* store = comp->gtNewStoreLclFldNode(lclNum, use.GetType(), use.GetOffset(), use.GetNode());
BlockRange().InsertAfter(use.GetNode(), store);
LowerNode(store);
}

return lclNum;
}

//----------------------------------------------------------------------------------------------
// LowerArgFieldList:
// Lower an argument FIELD_LIST node.
//
// Arguments:
// arg - The argument
// fieldList - The FIELD_LIST node
//
void Lowering::LowerArgFieldList(CallArg* arg, GenTreeFieldList* fieldList)
{
assert(!arg->AbiInfo.HasAnyStackSegment());

auto getRegInfo = [=](unsigned regIndex) {
const ABIPassingSegment& seg = arg->AbiInfo.Segment(regIndex);
return LowerFieldListRegisterInfo(seg.Offset, seg.GetRegisterType());
};

bool isCompatible = IsFieldListCompatibleWithRegisters(fieldList, arg->AbiInfo.NumSegments, getRegInfo);
if (!isCompatible)
{
ClassLayout* layout = comp->typGetObjLayout(arg->GetSignatureClassHandle());
unsigned lclNum = StoreFieldListToNewLocal(layout, fieldList);
fieldList->Uses().Clear();
for (const ABIPassingSegment& seg : arg->AbiInfo.Segments())
{
GenTreeLclFld* fld = comp->gtNewLclFldNode(lclNum, seg.GetRegisterType(layout), seg.Offset);
fieldList->AddFieldLIR(comp, fld, seg.Offset, fld->TypeGet());
BlockRange().InsertBefore(fieldList, fld);
}
}
else
{
LowerFieldListToFieldListOfRegisters(fieldList, arg->AbiInfo.NumSegments, getRegInfo);
}

GenTreeFieldList::Use* field = fieldList->Uses().GetHead();
for (const ABIPassingSegment& seg : arg->AbiInfo.Segments())
{
assert((field != nullptr) && "Ran out of fields while inserting PUTARG_REG");
InsertPutArgReg(&field->NodeRef(), seg);
field = field->GetNext();
}

assert((field == nullptr) && "Missed fields while inserting PUTARG_REG");

arg->NodeRef() = fieldList->SoleFieldOrThis();
if (arg->GetNode() != fieldList)
{
BlockRange().Remove(fieldList);
}
}

//----------------------------------------------------------------------------------------------
Expand All @@ -4874,21 +4957,29 @@ void Lowering::LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList)
// True if the fields of the FIELD_LIST are all direct insertions into the
// return registers.
//
bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList)
template <typename GetRegisterInfoFunc>
bool Lowering::IsFieldListCompatibleWithRegisters(GenTreeFieldList* fieldList,
unsigned numRegs,
GetRegisterInfoFunc getRegInfo)
{
JITDUMP("Checking if field list [%06u] is compatible with return ABI: ", Compiler::dspTreeID(fieldList));
const ReturnTypeDesc& retDesc = comp->compRetTypeDesc;
unsigned numRetRegs = retDesc.GetReturnRegCount();
JITDUMP("Checking if field list [%06u] is compatible with registers: ", Compiler::dspTreeID(fieldList));

GenTreeFieldList::Use* use = fieldList->Uses().GetHead();
for (unsigned i = 0; i < numRetRegs; i++)
for (unsigned i = 0; i < numRegs; i++)
{
unsigned regStart = retDesc.GetReturnFieldOffset(i);
var_types regType = retDesc.GetReturnRegType(i);
unsigned regEnd = regStart + genTypeSize(regType);
LowerFieldListRegisterInfo regInfo = getRegInfo(i);
unsigned regStart = regInfo.Offset;
var_types regType = regInfo.RegType;
unsigned regEnd = regStart + genTypeSize(regType);

if ((i == numRegs - 1) && !varTypeUsesFloatReg(regType))
{
// Allow tail end to pass undefined bits into the register
regEnd = regStart + REGSIZE_BYTES;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm just curious, do we have a guarantee that we won't load the undefined bits as a payload somehow?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, upper bits of registers containing struct values are undefined in the managed ABI. So this is ok. (This is just ensuring the handling matches what we had before; without this there was a bunch of regressions).

I think the actual source that creates these loads is morph:

#ifdef TARGET_ARM64
if ((offset > 0) && argNode->OperIsLocalRead())
{
// For arm64 it's beneficial to consider all tails to
// be TYP_I_IMPL to allow more ldp's.
type = TYP_I_IMPL;
}
#endif

I'm not sure it's worth it -- this means we sometimes can generate 2x 8byte loads => ldp instead of one 8 byte + one < 8 byte load, so the code is slightly smaller. But then maybe that may result in a stall? Probably worth to see if we can come up with an example and then maybe remove it if we can.

}

// TODO-CQ: Could just create a 0 for this.
if (use == nullptr)
if ((use == nullptr) || (use->GetOffset() >= regEnd))
{
JITDUMP("it is not; register %u has no corresponding field\n", i);
return false;
Expand Down Expand Up @@ -4949,19 +5040,26 @@ bool Lowering::IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList)
// Arguments:
// fieldList - The field list
//
void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList)
template <typename GetRegisterInfoFunc>
void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList,
unsigned numRegs,
GetRegisterInfoFunc getRegInfo)
{
const ReturnTypeDesc& retDesc = comp->compRetTypeDesc;
unsigned numRegs = retDesc.GetReturnRegCount();

GenTreeFieldList::Use* use = fieldList->Uses().GetHead();
assert(fieldList->Uses().IsSorted());

for (unsigned i = 0; i < numRegs; i++)
{
unsigned regStart = retDesc.GetReturnFieldOffset(i);
var_types regType = genActualType(retDesc.GetReturnRegType(i));
unsigned regEnd = regStart + genTypeSize(regType);
LowerFieldListRegisterInfo regInfo = getRegInfo(i);
unsigned regStart = regInfo.Offset;
var_types regType = regInfo.RegType;
unsigned regEnd = regStart + genTypeSize(regType);

if ((i == numRegs - 1) && !varTypeUsesFloatReg(regType))
{
// Allow tail end to pass undefined bits into the register
regEnd = regStart + REGSIZE_BYTES;
}

GenTreeFieldList::Use* regEntry = use;

Expand Down Expand Up @@ -5001,7 +5099,7 @@ void Lowering::LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList)
}

// If this is a float -> int insertion, then we need the bitcast now.
if (varTypeUsesFloatReg(value) && varTypeUsesIntReg(regType))
if (varTypeUsesFloatReg(value) && varTypeUsesIntReg(regInfo.RegType))
{
assert((genTypeSize(value) == 4) || (genTypeSize(value) == 8));
var_types castType = genTypeSize(value) == 4 ? TYP_INT : TYP_LONG;
Expand Down
12 changes: 8 additions & 4 deletions src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,14 @@ class Lowering final : public Phase
GenTree* LowerAsyncContinuation(GenTree* asyncCont);
void LowerReturnSuspend(GenTree* retSuspend);
void LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList);
bool IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList);
void LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList);
void LowerCallStruct(GenTreeCall* call);
void LowerStoreSingleRegCallStruct(GenTreeBlk* store);
unsigned StoreFieldListToNewLocal(ClassLayout* layout, GenTreeFieldList* fieldList);
void LowerArgFieldList(CallArg* arg, GenTreeFieldList* fieldList);
template <typename GetRegisterInfoFunc>
bool IsFieldListCompatibleWithRegisters(GenTreeFieldList* fieldList, unsigned numRegs, GetRegisterInfoFunc func);
template <typename GetRegisterInfoFunc>
void LowerFieldListToFieldListOfRegisters(GenTreeFieldList* fieldList, unsigned numRegs, GetRegisterInfoFunc func);
void LowerCallStruct(GenTreeCall* call);
void LowerStoreSingleRegCallStruct(GenTreeBlk* store);
#if !defined(WINDOWS_AMD64_ABI)
GenTreeLclVar* SpillStructCallResult(GenTreeCall* call) const;
#endif // WINDOWS_AMD64_ABI
Expand Down
Loading
Loading