Skip to content

Commit 7889937

Browse files
arm64: Implement missing SVE extract methods (#116095)
* arm64: Implement missing SVE extract methods * Fixes #115336 * Remove asserts & align intrinsic definitions * Remove HasRMWSemantics from ExtractAfterLastActiveElementScalar
1 parent 3c77442 commit 7889937

File tree

10 files changed

+1494
-1
lines changed

10 files changed

+1494
-1
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,3 +363,8 @@ src/coreclr/System.Private.CoreLib/common
363363
.dotnet-daily/
364364
run-stress-*
365365
test:.cs
366+
367+
# XUnit
368+
*.tempLog.xml
369+
*.testResults.xml
370+
*.testStats.csv

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3284,6 +3284,32 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
32843284
break;
32853285
}
32863286

3287+
case NI_Sve_ExtractAfterLastActiveElementScalar:
3288+
case NI_Sve_ExtractLastActiveElementScalar:
3289+
{
3290+
assert(sig->numArgs == 2);
3291+
3292+
#ifdef DEBUG
3293+
isValidScalarIntrinsic = true;
3294+
#endif
3295+
3296+
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
3297+
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
3298+
var_types argType = TYP_UNKNOWN;
3299+
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
3300+
3301+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
3302+
op2 = getArgForHWIntrinsic(argType, argClass);
3303+
CorInfoType op2BaseJitType = getBaseJitTypeOfSIMDType(argClass);
3304+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
3305+
op1 = getArgForHWIntrinsic(argType, argClass);
3306+
3307+
retNode = gtNewScalarHWIntrinsicNode(retType, op1, op2, intrinsic);
3308+
3309+
retNode->AsHWIntrinsic()->SetSimdBaseJitType(simdBaseJitType);
3310+
break;
3311+
}
3312+
32873313
case NI_Sve_MultiplyAddRotateComplexBySelectedScalar:
32883314
{
32893315
assert(sig->numArgs == 5);

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2577,6 +2577,35 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
25772577
break;
25782578
}
25792579

2580+
case NI_Sve_ExtractAfterLastActiveElementScalar:
2581+
case NI_Sve_ExtractLastActiveElementScalar:
2582+
{
2583+
opt = emitter::optGetSveInsOpt(emitTypeSize(node->GetSimdBaseType()));
2584+
2585+
if (emitter::isGeneralRegisterOrZR(targetReg))
2586+
{
2587+
assert(varTypeIsIntegralOrI(intrin.baseType));
2588+
2589+
emitSize = emitTypeSize(node);
2590+
GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt,
2591+
INS_SCALABLE_OPTS_NONE);
2592+
break;
2593+
}
2594+
2595+
// FP scalars are processed by the INS_SCALABLE_OPTS_WITH_SIMD_SCALAR variant of the instructions
2596+
FALLTHROUGH;
2597+
}
2598+
case NI_Sve_ExtractAfterLastActiveElement:
2599+
case NI_Sve_ExtractLastActiveElement:
2600+
{
2601+
assert(emitter::isFloatReg(targetReg));
2602+
assert(varTypeIsFloating(node->gtType) || varTypeIsSIMD(node->gtType));
2603+
2604+
GetEmitter()->emitInsSve_R_R_R(ins, EA_SCALABLE, targetReg, op1Reg, op2Reg, opt,
2605+
INS_SCALABLE_OPTS_WITH_SIMD_SCALAR);
2606+
break;
2607+
}
2608+
25802609
case NI_Sve_TrigonometricMultiplyAddCoefficient:
25812610
{
25822611
assert(isRMW);

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ HARDWARE_INTRINSIC(Sve, Divide,
9797
HARDWARE_INTRINSIC(Sve, DotProduct, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics)
9898
HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_LowVectorOperation)
9999
HARDWARE_INTRINSIC(Sve, DuplicateSelectedScalarToVector, -1, 2, {INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup, INS_sve_dup}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand)
100+
HARDWARE_INTRINSIC(Sve, ExtractAfterLastActiveElement, -1, 2, {INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
101+
HARDWARE_INTRINSIC(Sve, ExtractAfterLastActiveElementScalar, 0, 2, {INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta, INS_sve_lasta}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
102+
HARDWARE_INTRINSIC(Sve, ExtractLastActiveElement, -1, 2, {INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen)
103+
HARDWARE_INTRINSIC(Sve, ExtractLastActiveElementScalar, 0, 2, {INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb, INS_sve_lastb}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
100104
HARDWARE_INTRINSIC(Sve, ExtractVector, -1, 3, {INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen)
101105
HARDWARE_INTRINSIC(Sve, FloatingPointExponentialAccelerator, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fexpa, INS_invalid, INS_sve_fexpa, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
102106
HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)

0 commit comments

Comments
 (0)