Skip to content

Commit 309b60d

Browse files
committed
JIT ARM64-SVE: Allow LCL_VARs to store as mask
1 parent 4018d58 commit 309b60d

File tree

12 files changed

+185
-45
lines changed

12 files changed

+185
-45
lines changed

src/coreclr/jit/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,16 @@ function(create_standalone_jit)
7575
if ((TARGETDETAILS_ARCH STREQUAL "x64") OR (TARGETDETAILS_ARCH STREQUAL "arm64") OR ((TARGETDETAILS_ARCH STREQUAL "x86") AND NOT (TARGETDETAILS_OS STREQUAL "unix")))
7676
target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_SIMD)
7777
target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_HW_INTRINSICS)
78+
target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MASKED_SIMD)
79+
target_compile_definitions(${TARGETDETAILS_TARGET} PRIVATE FEATURE_MASKED_HW_INTRINSICS)
7880
endif ()
7981
endfunction()
8082

8183
if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR (CLR_CMAKE_TARGET_ARCH_I386 AND NOT CLR_CMAKE_HOST_UNIX))
8284
add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_SIMD>)
8385
add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_HW_INTRINSICS>)
86+
add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_MASKED_SIMD>)
87+
add_compile_definitions($<$<NOT:$<BOOL:$<TARGET_PROPERTY:IGNORE_DEFAULT_TARGET_ARCH>>>:FEATURE_MASKED_HW_INTRINSICS>)
8488
endif ()
8589

8690
# JIT_BUILD disables certain PAL_TRY debugging features

src/coreclr/jit/codegenarm64.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2771,7 +2771,16 @@ void CodeGen::genCodeForLclVar(GenTreeLclVar* tree)
27712771
emitAttr attr = emitActualTypeSize(targetType);
27722772

27732773
emitter* emit = GetEmitter();
2774-
emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0);
2774+
2775+
if (ins == INS_sve_ldr && !varTypeUsesMaskReg(targetType))
2776+
{
2777+
emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0, INS_SCALABLE_OPTS_UNPREDICATED);
2778+
}
2779+
else
2780+
{
2781+
emit->emitIns_R_S(ins, attr, tree->GetRegNum(), varNum, 0);
2782+
}
2783+
27752784
genProduceReg(tree);
27762785
}
27772786
}
@@ -2956,7 +2965,15 @@ void CodeGen::genCodeForStoreLclVar(GenTreeLclVar* lclNode)
29562965
instruction ins = ins_StoreFromSrc(dataReg, targetType);
29572966
emitAttr attr = emitActualTypeSize(targetType);
29582967

2959-
emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0);
2968+
// TODO-SVE: Removable once REG_V0 and REG_P0 are distinct
2969+
if (ins == INS_sve_str && !varTypeUsesMaskReg(targetType))
2970+
{
2971+
emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0, INS_SCALABLE_OPTS_UNPREDICATED);
2972+
}
2973+
else
2974+
{
2975+
emit->emitIns_S_R(ins, attr, dataReg, varNum, /* offset */ 0);
2976+
}
29602977
}
29612978
else // store into register (i.e move into register)
29622979
{

src/coreclr/jit/emitarm64.cpp

Lines changed: 88 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -17311,13 +17311,14 @@ void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
1731117311
*
1731217312
* Add an instruction referencing a register and a stack-based local variable.
1731317313
*/
17314-
void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
17314+
void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
1731517315
{
1731617316
emitAttr size = EA_SIZE(attr);
1731717317
insFormat fmt = IF_NONE;
1731817318
int disp = 0;
1731917319
unsigned scale = 0;
1732017320
bool isLdrStr = false;
17321+
bool isScalable = false;
1732117322

1732217323
assert(offs >= 0);
1732317324

@@ -17353,6 +17354,31 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
1735317354
scale = 0;
1735417355
break;
1735517356

17357+
case INS_sve_ldr:
17358+
assert(isVectorRegister(reg1) || isPredicateRegister(reg1));
17359+
isScalable = true;
17360+
17361+
// TODO-SVE: This should probably be set earlier in the caller
17362+
size = EA_SCALABLE;
17363+
attr = size;
17364+
17365+
// TODO-SVE: Use register number instead of enum
17366+
if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
17367+
{
17368+
fmt = IF_SVE_IE_2A;
17369+
// TODO-SVE: Don't assume 128bit vectors
17370+
scale = NaturalScale_helper(EA_16BYTE);
17371+
}
17372+
else
17373+
{
17374+
assert(insScalableOptsNone(sopt));
17375+
fmt = IF_SVE_ID_2A;
17376+
// TODO-SVE: Don't assume 128bit vectors
17377+
// Predicate size is vector length / 8
17378+
scale = NaturalScale_helper(EA_2BYTE);
17379+
}
17380+
break;
17381+
1735617382
default:
1735717383
NYI("emitIns_R_S"); // FP locals?
1735817384
return;
@@ -17363,6 +17389,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
1736317389
ssize_t imm;
1736417390
int base;
1736517391
bool FPbased;
17392+
insFormat scalarfmt = fmt;
1736617393

1736717394
base = emitComp->lvaFrameAddress(varx, &FPbased);
1736817395
disp = base + offs;
@@ -17387,13 +17414,13 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
1738717414

1738817415
if (imm <= 0x0fff)
1738917416
{
17390-
fmt = IF_DI_2A; // add reg1,reg2,#disp
17417+
scalarfmt = IF_DI_2A; // add reg1,reg2,#disp
1739117418
}
1739217419
else
1739317420
{
1739417421
regNumber rsvdReg = codeGen->rsGetRsvdReg();
1739517422
codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
17396-
fmt = IF_DR_3A; // add reg1,reg2,rsvdReg
17423+
scalarfmt = IF_DR_3A; // add reg1,reg2,rsvdReg
1739717424
}
1739817425
}
1739917426
else
@@ -17402,13 +17429,13 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
1740217429
imm = disp;
1740317430
if (imm == 0)
1740417431
{
17405-
fmt = IF_LS_2A;
17432+
scalarfmt = IF_LS_2A;
1740617433
}
1740717434
else if ((imm < 0) || ((imm & mask) != 0))
1740817435
{
1740917436
if ((imm >= -256) && (imm <= 255))
1741017437
{
17411-
fmt = IF_LS_2C;
17438+
scalarfmt = IF_LS_2C;
1741217439
}
1741317440
else
1741417441
{
@@ -17417,11 +17444,13 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
1741717444
}
1741817445
else if (imm > 0)
1741917446
{
17447+
// TODO: We should be able to scale values <0 for all variants.
17448+
1742017449
if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
1742117450
{
1742217451
imm >>= scale; // The immediate is scaled by the size of the ld/st
1742317452

17424-
fmt = IF_LS_2B;
17453+
scalarfmt = IF_LS_2B;
1742517454
}
1742617455
else
1742717456
{
@@ -17433,10 +17462,15 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber reg1, int va
1743317462
{
1743417463
regNumber rsvdReg = codeGen->rsGetRsvdReg();
1743517464
codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
17436-
fmt = IF_LS_3A;
17465+
scalarfmt = IF_LS_3A;
1743717466
}
1743817467
}
1743917468

17469+
// Set the format based on the immediate encoding
17470+
if (!isScalable)
17471+
{
17472+
fmt = scalarfmt;
17473+
}
1744017474
assert(fmt != IF_NONE);
1744117475

1744217476
// Try to optimize a load/store with an alternative instruction.
@@ -17564,15 +17598,16 @@ void emitter::emitIns_R_R_S_S(
1756417598
*
1756517599
* Add an instruction referencing a stack-based local variable and a register
1756617600
*/
17567-
void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs)
17601+
void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, insScalableOpts sopt /* = INS_SCALABLE_OPTS_NONE */)
1756817602
{
1756917603
assert(offs >= 0);
17570-
emitAttr size = EA_SIZE(attr);
17571-
insFormat fmt = IF_NONE;
17572-
int disp = 0;
17573-
unsigned scale = 0;
17574-
bool isVectorStore = false;
17575-
bool isStr = false;
17604+
emitAttr size = EA_SIZE(attr);
17605+
insFormat fmt = IF_NONE;
17606+
int disp = 0;
17607+
unsigned scale = 0;
17608+
bool isVectorStore = false;
17609+
bool isStr = false;
17610+
bool isScalable = false;
1757617611

1757717612
// TODO-ARM64-CQ: use unscaled loads?
1757817613
/* Figure out the encoding format of the instruction */
@@ -17604,6 +17639,31 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
1760417639
isStr = true;
1760517640
break;
1760617641

17642+
case INS_sve_str:
17643+
assert(isVectorRegister(reg1) || isPredicateRegister(reg1));
17644+
isScalable = true;
17645+
17646+
// TODO-SVE: This should probably be set earlier in the caller
17647+
size = EA_SCALABLE;
17648+
attr = size;
17649+
17650+
// TODO-SVE: Use register number instead of enum
17651+
if (sopt == INS_SCALABLE_OPTS_UNPREDICATED)
17652+
{
17653+
fmt = IF_SVE_JH_2A;
17654+
// TODO-SVE: Don't assume 128bit vectors
17655+
scale = NaturalScale_helper(EA_16BYTE);
17656+
}
17657+
else
17658+
{
17659+
assert(insScalableOptsNone(sopt));
17660+
fmt = IF_SVE_JG_2A;
17661+
// TODO-SVE: Don't assume 128bit vectors
17662+
// Predicate size is vector length / 8
17663+
scale = NaturalScale_helper(EA_2BYTE);
17664+
}
17665+
break;
17666+
1760717667
default:
1760817668
NYI("emitIns_S_R"); // FP locals?
1760917669
return;
@@ -17617,7 +17677,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
1761717677
base = emitComp->lvaFrameAddress(varx, &FPbased);
1761817678
disp = base + offs;
1761917679
assert(scale >= 0);
17620-
if (isVectorStore)
17680+
if (isVectorStore || isScalable)
1762117681
{
1762217682
assert(scale <= 4);
1762317683
}
@@ -17633,15 +17693,16 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
1763317693
bool useRegForImm = false;
1763417694
ssize_t imm = disp;
1763517695
ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
17696+
insFormat scalarfmt = fmt;
1763617697
if (imm == 0)
1763717698
{
17638-
fmt = IF_LS_2A;
17699+
scalarfmt = IF_LS_2A;
1763917700
}
1764017701
else if ((imm < 0) || ((imm & mask) != 0))
1764117702
{
17642-
if ((imm >= -256) && (imm <= 255))
17703+
if (isValidSimm9(imm))
1764317704
{
17644-
fmt = IF_LS_2C;
17705+
scalarfmt = IF_LS_2C;
1764517706
}
1764617707
else
1764717708
{
@@ -17650,11 +17711,12 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
1765017711
}
1765117712
else if (imm > 0)
1765217713
{
17714+
// TODO: We should be able to scale values <0 for all variants.
17715+
1765317716
if (((imm & mask) == 0) && ((imm >> scale) < 0x1000))
1765417717
{
1765517718
imm >>= scale; // The immediate is scaled by the size of the ld/st
17656-
17657-
fmt = IF_LS_2B;
17719+
scalarfmt = IF_LS_2B;
1765817720
}
1765917721
else
1766017722
{
@@ -17668,9 +17730,14 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber reg1, int va
1766817730
// It is instead implicit when idSetIsLclVar() is set, with this encoding format.
1766917731
regNumber rsvdReg = codeGen->rsGetRsvdReg();
1767017732
codeGen->instGen_Set_Reg_To_Imm(EA_PTRSIZE, rsvdReg, imm);
17671-
fmt = IF_LS_3A;
17733+
scalarfmt = IF_LS_3A;
1767217734
}
1767317735

17736+
// Set the format based on the immediate encoding
17737+
if (!isScalable)
17738+
{
17739+
fmt = scalarfmt;
17740+
}
1767417741
assert(fmt != IF_NONE);
1767517742

1767617743
// Try to optimize a store with an alternative instruction.

src/coreclr/jit/emitarm64.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1782,7 +1782,7 @@ void emitIns_C(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int
17821782

17831783
void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
17841784

1785-
void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
1785+
void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs, insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);
17861786

17871787
void emitIns_S_S_R_R(
17881788
instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);
@@ -1800,7 +1800,7 @@ void emitIns_R_R_R_I_LdStPair(instruction ins,
18001800
int offs2 = -1 DEBUG_ARG(unsigned var1RefsOffs = BAD_IL_OFFSET)
18011801
DEBUG_ARG(unsigned var2RefsOffs = BAD_IL_OFFSET));
18021802

1803-
void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
1803+
void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs, insScalableOpts sopt = INS_SCALABLE_OPTS_NONE);
18041804

18051805
void emitIns_R_R_S_S(
18061806
instruction ins, emitAttr attr, emitAttr attr2, regNumber ireg, regNumber ireg2, int varx, int offs);

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -778,7 +778,11 @@ GenTree* Compiler::getArgForHWIntrinsic(var_types argType,
778778
{
779779
arg = impSIMDPopStack();
780780
}
781+
#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_SIMD)
782+
assert(varTypeIsSIMD(arg) || varTypeIsMask(arg));
783+
#else
781784
assert(varTypeIsSIMD(arg));
785+
#endif // TARGET_ARM64 && FEATURE_MASKED_SIMD
782786
}
783787
else
784788
{
@@ -1591,13 +1595,16 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
15911595
}
15921596

15931597
#if defined(TARGET_ARM64)
1598+
15941599
if (HWIntrinsicInfo::IsMaskedOperation(intrinsic))
15951600
{
1596-
// Op1 input is a vector. HWInstrinsic requires a mask, so convert to a mask.
15971601
assert(numArgs > 0);
1598-
GenTree* op1 = retNode->AsHWIntrinsic()->Op(1);
1599-
op1 = convertHWIntrinsicToMask(retType, op1, simdBaseJitType, simdSize);
1600-
retNode->AsHWIntrinsic()->Op(1) = op1;
1602+
GenTree* op1 = retNode->AsHWIntrinsic()->Op(1);
1603+
if (op1->TypeGet() != TYP_MASK)
1604+
{
1605+
// Op1 input is a vector. HWInstrinsic requires a mask.
1606+
retNode->AsHWIntrinsic()->Op(1) = convertHWIntrinsicToMask(retType, op1, simdBaseJitType, simdSize);
1607+
}
16011608
}
16021609

16031610
if (retType != nodeRetType)

src/coreclr/jit/importer.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6419,6 +6419,17 @@ void Compiler::impImportBlockCode(BasicBlock* block)
64196419
impSpillSideEffects(false, CHECK_SPILL_ALL DEBUGARG("Spill before store to pinned local"));
64206420
}
64216421

6422+
#if defined(TARGET_ARM64) && defined(FEATURE_MASKED_SIMD)
6423+
// Masks must be converted to vectors before being stored to memory.
6424+
// But, for local stores we can optimise away the conversion
6425+
if (op1->OperIsHWIntrinsic() && op1->AsHWIntrinsic()->GetHWIntrinsicId() == NI_Sve_ConvertMaskToVector)
6426+
{
6427+
op1 = op1->AsHWIntrinsic()->Op(1);
6428+
lvaTable[lclNum].lvType = TYP_MASK;
6429+
lclTyp = lvaGetActualType(lclNum);
6430+
}
6431+
#endif // TARGET_ARM64 && FEATURE_MASKED_SIMD
6432+
64226433
op1 = gtNewStoreLclVarNode(lclNum, op1);
64236434

64246435
// TODO-ASG: delete this zero-diff quirk. Requires some forward substitution work.

0 commit comments

Comments
 (0)