Skip to content

Commit 20a6043

Browse files
kunalspathakRuihan-Yin
authored andcommitted
Handle more than 64 registers - Part 1 (dotnet#101950)
* Convert regMaskTP for ARM64 to struct with single field * Fix genFirstRegNumFromMaskAndToggle() and genFirstRegNumFromMask() * minor fix * review feedback * fix the TP regression from 1.5% -> 0.5% * Pass by value * jit format * review feedback * Remove FORCEINLINE * Remove setLow()
1 parent b89941e commit 20a6043

File tree

11 files changed

+228
-32
lines changed

11 files changed

+228
-32
lines changed

src/coreclr/jit/codegencommon.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3829,9 +3829,9 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP&
38293829

38303830
// Iterate through float/double registers and initialize them to 0 or
38313831
// copy from already initialized register of the same type.
3832-
regMaskTP regMask = genRegMask(REG_FP_FIRST);
3833-
for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg), regMask <<= 1)
3832+
for (regNumber reg = REG_FP_FIRST; reg <= REG_FP_LAST; reg = REG_NEXT(reg))
38343833
{
3834+
regMaskTP regMask = genRegMask(reg);
38353835
if (regMask & initFltRegs)
38363836
{
38373837
// Do we have a float register already set to 0?
@@ -5732,10 +5732,9 @@ void CodeGen::genFnProlog()
57325732

57335733
if (initRegs)
57345734
{
5735-
regMaskTP regMask = 0x1;
5736-
5737-
for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg), regMask <<= 1)
5735+
for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
57385736
{
5737+
regMaskTP regMask = genRegMask(reg);
57395738
if (regMask & initRegs)
57405739
{
57415740
// Check if we have already zeroed this register

src/coreclr/jit/compiler.hpp

Lines changed: 61 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,33 @@ inline bool genExactlyOneBit(T value)
9999
return ((value != 0) && genMaxOneBit(value));
100100
}
101101

102+
#ifdef TARGET_ARM64
103+
inline regMaskTP genFindLowestBit(regMaskTP value)
104+
{
105+
return regMaskTP(genFindLowestBit(value.getLow()));
106+
}
107+
108+
/*****************************************************************************
109+
*
110+
* Return true if the given value has exactly zero or one bits set.
111+
*/
112+
113+
inline bool genMaxOneBit(regMaskTP value)
114+
{
115+
return genMaxOneBit(value.getLow());
116+
}
117+
118+
/*****************************************************************************
119+
*
120+
* Return true if the given value has exactly one bit set.
121+
*/
122+
123+
inline bool genExactlyOneBit(regMaskTP value)
124+
{
125+
return genExactlyOneBit(value.getLow());
126+
}
127+
#endif
128+
102129
/*****************************************************************************
103130
*
104131
* Given a value that has exactly one bit set, return the position of that
@@ -147,6 +174,13 @@ inline unsigned genCountBits(uint64_t bits)
147174
return BitOperations::PopCount(bits);
148175
}
149176

177+
#ifdef TARGET_ARM64
178+
inline unsigned genCountBits(regMaskTP mask)
179+
{
180+
return BitOperations::PopCount(mask.getLow());
181+
}
182+
#endif
183+
150184
/*****************************************************************************
151185
*
152186
* A rather simple routine that counts the number of bits in a given number.
@@ -914,11 +948,18 @@ inline regNumber genRegNumFromMask(regMaskTP mask)
914948

915949
/* Convert the mask to a register number */
916950

917-
regNumber regNum = (regNumber)genLog2(mask);
951+
#ifdef TARGET_ARM64
952+
regNumber regNum = (regNumber)genLog2(mask.getLow());
918953

919954
/* Make sure we got it right */
955+
assert(genRegMask(regNum) == mask.getLow());
920956

957+
#else
958+
regNumber regNum = (regNumber)genLog2(mask);
959+
960+
/* Make sure we got it right */
921961
assert(genRegMask(regNum) == mask);
962+
#endif
922963

923964
return regNum;
924965
}
@@ -940,7 +981,8 @@ inline regNumber genFirstRegNumFromMaskAndToggle(regMaskTP& mask)
940981

941982
/* Convert the mask to a register number */
942983

943-
regNumber regNum = (regNumber)BitOperations::BitScanForward(mask);
984+
regNumber regNum = (regNumber)BitScanForward(mask);
985+
944986
mask ^= genRegMask(regNum);
945987

946988
return regNum;
@@ -962,7 +1004,7 @@ inline regNumber genFirstRegNumFromMask(regMaskTP mask)
9621004

9631005
/* Convert the mask to a register number */
9641006

965-
regNumber regNum = (regNumber)BitOperations::BitScanForward(mask);
1007+
regNumber regNum = (regNumber)BitScanForward(mask);
9661008

9671009
return regNum;
9681010
}
@@ -4463,30 +4505,46 @@ inline void* operator new[](size_t sz, Compiler* compiler, CompMemKind cmk)
44634505

44644506
inline void printRegMask(regMaskTP mask)
44654507
{
4508+
#ifdef TARGET_ARM64
4509+
printf(REG_MASK_ALL_FMT, mask.getLow());
4510+
#else
44664511
printf(REG_MASK_ALL_FMT, mask);
4512+
#endif
44674513
}
44684514

44694515
inline char* regMaskToString(regMaskTP mask, Compiler* context)
44704516
{
44714517
const size_t cchRegMask = 24;
44724518
char* regmask = new (context, CMK_Unknown) char[cchRegMask];
44734519

4520+
#ifdef TARGET_ARM64
4521+
sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask.getLow());
4522+
#else
44744523
sprintf_s(regmask, cchRegMask, REG_MASK_ALL_FMT, mask);
4524+
#endif
44754525

44764526
return regmask;
44774527
}
44784528

44794529
inline void printRegMaskInt(regMaskTP mask)
44804530
{
4531+
#ifdef TARGET_ARM64
4532+
printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT).getLow());
4533+
#else
44814534
printf(REG_MASK_INT_FMT, (mask & RBM_ALLINT));
4535+
#endif
44824536
}
44834537

44844538
inline char* regMaskIntToString(regMaskTP mask, Compiler* context)
44854539
{
44864540
const size_t cchRegMask = 24;
44874541
char* regmask = new (context, CMK_Unknown) char[cchRegMask];
44884542

4543+
#ifdef TARGET_ARM64
4544+
sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT).getLow());
4545+
#else
44894546
sprintf_s(regmask, cchRegMask, REG_MASK_INT_FMT, (mask & RBM_ALLINT));
4547+
#endif
44904548

44914549
return regmask;
44924550
}

src/coreclr/jit/emit.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3510,7 +3510,7 @@ void emitter::emitDispRegSet(regMaskTP regs)
35103510
continue;
35113511
}
35123512

3513-
regs -= curReg;
3513+
regs ^= curReg;
35143514

35153515
if (sp)
35163516
{
@@ -3870,8 +3870,8 @@ void emitter::emitDispGCRegDelta(const char* title, regMaskTP prevRegs, regMaskT
38703870
{
38713871
emitDispGCDeltaTitle(title);
38723872
regMaskTP sameRegs = prevRegs & curRegs;
3873-
regMaskTP removedRegs = prevRegs - sameRegs;
3874-
regMaskTP addedRegs = curRegs - sameRegs;
3873+
regMaskTP removedRegs = prevRegs ^ sameRegs;
3874+
regMaskTP addedRegs = curRegs ^ sameRegs;
38753875
if (removedRegs != RBM_NONE)
38763876
{
38773877
printf(" -");
@@ -8972,7 +8972,7 @@ void emitter::emitUpdateLiveGCregs(GCtype gcType, regMaskTP regs, BYTE* addr)
89728972
emitGCregDeadUpd(reg, addr);
89738973
}
89748974

8975-
chg -= bit;
8975+
chg ^= bit;
89768976
} while (chg);
89778977

89788978
assert(emitThisXXrefRegs == regs);

src/coreclr/jit/gcencode.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4667,7 +4667,7 @@ void GCInfo::gcInfoRecordGCRegStateChange(GcInfoEncoder* gcInfoEncoder,
46674667
}
46684668

46694669
// Turn the bit we've just generated off and continue.
4670-
regMask -= tmpMask; // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
4670+
regMask ^= tmpMask; // EAX,ECX,EDX,EBX,---,EBP,ESI,EDI
46714671
}
46724672
}
46734673

src/coreclr/jit/lsra.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13614,7 +13614,7 @@ regMaskTP LinearScan::RegisterSelection::select(Interval* current
1361413614
&overallLimitCandidates);
1361513615
assert(limitConsecutiveResult != RBM_NONE);
1361613616

13617-
unsigned startRegister = BitOperations::BitScanForward(limitConsecutiveResult);
13617+
unsigned startRegister = BitScanForward(limitConsecutiveResult);
1361813618

1361913619
regMaskTP registersNeededMask = (1ULL << refPosition->regCount) - 1;
1362013620
candidates |= (registersNeededMask << startRegister);

src/coreclr/jit/lsra.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -794,8 +794,8 @@ class LinearScan : public LinearScanInterface
794794
static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R3 | RBM_R4 | RBM_R5);
795795
static const regMaskTP LsraLimitSmallFPSet = (RBM_F0 | RBM_F1 | RBM_F2 | RBM_F16 | RBM_F17);
796796
#elif defined(TARGET_ARM64)
797-
static const regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
798-
static const regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
797+
static constexpr regMaskTP LsraLimitSmallIntSet = (RBM_R0 | RBM_R1 | RBM_R2 | RBM_R19 | RBM_R20);
798+
static constexpr regMaskTP LsraLimitSmallFPSet = (RBM_V0 | RBM_V1 | RBM_V2 | RBM_V8 | RBM_V9);
799799
#elif defined(TARGET_X86)
800800
static const regMaskTP LsraLimitSmallIntSet = (RBM_EAX | RBM_ECX | RBM_EDI);
801801
static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | RBM_XMM6 | RBM_XMM7);

src/coreclr/jit/lsraarm64.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
180180
unsigned int registersNeeded,
181181
regMaskTP* allConsecutiveCandidates)
182182
{
183-
if (BitOperations::PopCount(candidates) < registersNeeded)
183+
if (PopCount(candidates) < registersNeeded)
184184
{
185185
// There is no way the register demanded can be satisfied for this RefPosition
186186
// based on the candidates from which it can allocate a register.
@@ -205,7 +205,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
205205
do
206206
{
207207
// From LSB, find the first available register (bit `1`)
208-
regAvailableStartIndex = BitOperations::BitScanForward(static_cast<DWORD64>(currAvailableRegs));
208+
regAvailableStartIndex = BitScanForward(currAvailableRegs);
209209
regMaskTP startMask = (1ULL << regAvailableStartIndex) - 1;
210210

211211
// Mask all the bits that are processed from LSB thru regAvailableStart until the last `1`.
@@ -223,7 +223,7 @@ regMaskTP LinearScan::filterConsecutiveCandidates(regMaskTP candidates,
223223
}
224224
else
225225
{
226-
regAvailableEndIndex = BitOperations::BitScanForward(static_cast<DWORD64>(maskProcessed));
226+
regAvailableEndIndex = BitScanForward(maskProcessed);
227227
}
228228
regMaskTP endMask = (1ULL << regAvailableEndIndex) - 1;
229229

@@ -335,7 +335,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC
335335
do
336336
{
337337
// From LSB, find the first available register (bit `1`)
338-
regAvailableStartIndex = BitOperations::BitScanForward(static_cast<DWORD64>(unprocessedRegs));
338+
regAvailableStartIndex = BitScanForward(unprocessedRegs);
339339

340340
// For the current range, find how many registers are free vs. busy
341341
regMaskTP maskForCurRange = RBM_NONE;
@@ -370,7 +370,7 @@ regMaskTP LinearScan::filterConsecutiveCandidatesForSpill(regMaskTP consecutiveC
370370
// In the given range, there are some free registers available. Calculate how many registers
371371
// will need spilling if this range is picked.
372372

373-
int curSpillRegs = registersNeeded - BitOperations::PopCount(maskForCurRange);
373+
int curSpillRegs = registersNeeded - PopCount(maskForCurRange);
374374
if (curSpillRegs < maxSpillRegs)
375375
{
376376
consecutiveResultForBusy = 1ULL << regAvailableStartIndex;

src/coreclr/jit/lsrabuild.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2898,7 +2898,7 @@ void LinearScan::stressSetRandomParameterPreferences()
28982898

28992899
// Select a random register from all possible parameter registers
29002900
// (of the right type). Preference this parameter to that register.
2901-
unsigned numBits = BitOperations::PopCount(*regs);
2901+
unsigned numBits = PopCount(*regs);
29022902
if (numBits == 0)
29032903
{
29042904
continue;

src/coreclr/jit/regset.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,7 @@ regMaskSmall genRegMaskFromCalleeSavedMask(unsigned short calleeSaveMask)
957957
regMaskSmall res = 0;
958958
for (int i = 0; i < CNT_CALL_GC_REGS; i++)
959959
{
960-
if ((calleeSaveMask & ((regMaskTP)1 << i)) != 0)
960+
if ((calleeSaveMask & (1 << i)) != 0)
961961
{
962962
res |= raRbmCalleeSaveOrder[i];
963963
}

0 commit comments

Comments
 (0)