Skip to content

Commit c86774f

Browse files
Implement SVE AddCarryWidening (Upper/Lower) (#116429)
* SVE2 API for AddCarryWideningLower * SVE2 API for AddCarryWideningUpper * Adding bounds checks in AddCarry helper functions * Handling RMW semantics on 3rd operand in lsra and codegen * Fixing formatting * Updating array validation in helper functions Change-Id: I9dd753d27af6041f96792ad2958a5cc4f0093ca4 * adding parentheses to subexpressions Change-Id: Ibb95a18ad5abffae2a4b215a31412395c750ba89 * Update src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs * Update src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs * Update src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs * Update src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs * Update src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs * Update src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs * Update src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs --------- Co-authored-by: Kunal Pathak <[email protected]>
1 parent 542b947 commit c86774f

File tree

10 files changed

+253
-1
lines changed

10 files changed

+253
-1
lines changed

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2685,6 +2685,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
26852685
break;
26862686
}
26872687

2688+
case NI_Sve2_AddCarryWideningLower:
2689+
case NI_Sve2_AddCarryWideningUpper:
2690+
if (targetReg != op3Reg)
2691+
{
2692+
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op3Reg, /* canSkip */ true);
2693+
}
2694+
GetEmitter()->emitInsSve_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
2695+
break;
2696+
26882697
case NI_Sve2_BitwiseClearXor:
26892698
case NI_Sve2_Xor:
26902699
if (targetReg != op1Reg)

src/coreclr/jit/hwintrinsiclistarm64sve.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,8 @@ HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceAddWideningLower,
318318
HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceAddWideningUpper, -1, 3, {INS_invalid, INS_invalid, INS_sve_sabalt, INS_sve_uabalt, INS_sve_sabalt, INS_sve_uabalt, INS_sve_sabalt, INS_sve_uabalt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasRMWSemantics)
319319
HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningLower, -1, 2, {INS_invalid, INS_invalid, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_sve_sabdlb, INS_sve_uabdlb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable)
320320
HARDWARE_INTRINSIC(Sve2, AbsoluteDifferenceWideningUpper, -1, 2, {INS_invalid, INS_invalid, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_sve_sabdlt, INS_sve_uabdlt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable)
321+
HARDWARE_INTRINSIC(Sve2, AddCarryWideningLower, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adclb, INS_invalid, INS_sve_adclb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen)
322+
HARDWARE_INTRINSIC(Sve2, AddCarryWideningUpper, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adclt, INS_invalid, INS_sve_adclt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen)
321323
HARDWARE_INTRINSIC(Sve2, BitwiseClearXor, -1, 3, {INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_sve_bcax, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
322324
HARDWARE_INTRINSIC(Sve2, BitwiseSelect, -1, 3, {INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_sve_bsl, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)
323325
HARDWARE_INTRINSIC(Sve2, BitwiseSelectLeftInverted, -1, 3, {INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_sve_bsl1n, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics)

src/coreclr/jit/lsra.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1935,6 +1935,7 @@ class LinearScan : public LinearScanInterface
19351935
// 'tgtPrefUse' to that RefPosition.
19361936
RefPosition* tgtPrefUse = nullptr;
19371937
RefPosition* tgtPrefUse2 = nullptr;
1938+
RefPosition* tgtPrefUse3 = nullptr;
19381939

19391940
public:
19401941
// The following keep track of information about internal (temporary register) intervals
@@ -1957,6 +1958,7 @@ class LinearScan : public LinearScanInterface
19571958
{
19581959
tgtPrefUse = nullptr;
19591960
tgtPrefUse2 = nullptr;
1961+
tgtPrefUse3 = nullptr;
19601962
internalCount = 0;
19611963
setInternalRegsDelayFree = false;
19621964
pendingDelayFree = false;

src/coreclr/jit/lsraarm64.cpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1464,15 +1464,25 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
14641464
{
14651465
assert(tgtPrefUse == nullptr);
14661466
assert(tgtPrefUse2 == nullptr);
1467+
assert(tgtPrefUse3 == nullptr);
14671468
tgtPrefUse = delayUse;
14681469
}
1469-
else
1470+
else if (opNum == 2)
14701471
{
14711472
assert(opNum == 2);
14721473
assert(tgtPrefUse == nullptr);
14731474
assert(tgtPrefUse2 == nullptr);
1475+
assert(tgtPrefUse3 == nullptr);
14741476
tgtPrefUse2 = delayUse;
14751477
}
1478+
else
1479+
{
1480+
assert(opNum == 3);
1481+
assert(tgtPrefUse == nullptr);
1482+
assert(tgtPrefUse2 == nullptr);
1483+
assert(tgtPrefUse3 == nullptr);
1484+
tgtPrefUse3 = delayUse;
1485+
}
14761486
}
14771487
}
14781488
else if (containedCselOp == operand)
@@ -2292,6 +2302,14 @@ GenTree* LinearScan::getDelayFreeOperand(GenTreeHWIntrinsic* intrinsicTree, bool
22922302
assert(delayFreeOp != nullptr);
22932303
break;
22942304

2305+
case NI_Sve2_AddCarryWideningLower:
2306+
case NI_Sve2_AddCarryWideningUpper:
2307+
// RMW operates on the third op.
2308+
assert(isRMW);
2309+
delayFreeOp = intrinsicTree->Op(3);
2310+
assert(delayFreeOp != nullptr);
2311+
break;
2312+
22952313
default:
22962314
if (isRMW)
22972315
{

src/coreclr/jit/lsrabuild.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3062,6 +3062,7 @@ RefPosition* LinearScan::BuildDef(GenTree* tree, SingleTypeRegSet dstCandidates,
30623062
#ifndef TARGET_ARM
30633063
setTgtPref(interval, tgtPrefUse);
30643064
setTgtPref(interval, tgtPrefUse2);
3065+
setTgtPref(interval, tgtPrefUse3);
30653066
#endif // !TARGET_ARM
30663067

30673068
#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.PlatformNotSupported.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,34 @@ internal Arm64() { }
230230
/// </summary>
231231
public static Vector<ulong> AbsoluteDifferenceWideningUpper(Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }
232232

233+
// Add with carry long (bottom)
234+
235+
/// <summary>
236+
/// svuint32_t svadclb[_u32](svuint32_t op1, svuint32_t op2, svuint32_t op3)
237+
/// ADCLB Ztied1.S, Zop2.S, Zop3.S
238+
/// </summary>
239+
public static unsafe Vector<uint> AddCarryWideningLower(Vector<uint> op1, Vector<uint> op2, Vector<uint> op3) { throw new PlatformNotSupportedException(); }
240+
241+
/// <summary>
242+
/// svuint64_t svadclb[_u64](svuint64_t op1, svuint64_t op2, svuint64_t op3)
243+
/// ADCLB Ztied1.D, Zop2.D, Zop3.D
244+
/// </summary>
245+
public static unsafe Vector<ulong> AddCarryWideningLower(Vector<ulong> op1, Vector<ulong> op2, Vector<ulong> op3) { throw new PlatformNotSupportedException(); }
246+
247+
// Add with carry long (top)
248+
249+
/// <summary>
250+
/// svuint32_t svadclt[_u32](svuint32_t op1, svuint32_t op2, svuint32_t op3)
251+
/// ADCLT Ztied1.S, Zop2.S, Zop3.S
252+
/// </summary>
253+
public static unsafe Vector<uint> AddCarryWideningUpper(Vector<uint> op1, Vector<uint> op2, Vector<uint> op3) { throw new PlatformNotSupportedException(); }
254+
255+
/// <summary>
256+
/// svuint64_t svadclt[_u64](svuint64_t op1, svuint64_t op2, svuint64_t op3)
257+
/// ADCLT Ztied1.D, Zop2.D, Zop3.D
258+
/// </summary>
259+
public static unsafe Vector<ulong> AddCarryWideningUpper(Vector<ulong> op1, Vector<ulong> op2, Vector<ulong> op3) { throw new PlatformNotSupportedException(); }
260+
233261
// Bitwise clear and exclusive OR
234262

235263
/// <summary>

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve2.cs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,34 @@ internal Arm64() { }
230230
/// </summary>
231231
public static Vector<ulong> AbsoluteDifferenceWideningUpper(Vector<uint> left, Vector<uint> right) => AbsoluteDifferenceWideningUpper(left, right);
232232

233+
// Add with carry long (bottom)
234+
235+
/// <summary>
236+
/// svuint32_t svadclb[_u32](svuint32_t op1, svuint32_t op2, svuint32_t op3)
237+
/// ADCLB Ztied1.S, Zop2.S, Zop3.S
238+
/// </summary>
239+
public static unsafe Vector<uint> AddCarryWideningLower(Vector<uint> op1, Vector<uint> op2, Vector<uint> op3) => AddCarryWideningLower(op1, op2, op3);
240+
241+
/// <summary>
242+
/// svuint64_t svadclb[_u64](svuint64_t op1, svuint64_t op2, svuint64_t op3)
243+
/// ADCLB Ztied1.D, Zop2.D, Zop3.D
244+
/// </summary>
245+
public static unsafe Vector<ulong> AddCarryWideningLower(Vector<ulong> op1, Vector<ulong> op2, Vector<ulong> op3) => AddCarryWideningLower(op1, op2, op3);
246+
247+
// Add with carry long (top)
248+
249+
/// <summary>
250+
/// svuint32_t svadclt[_u32](svuint32_t op1, svuint32_t op2, svuint32_t op3)
251+
/// ADCLT Ztied1.S, Zop2.S, Zop3.S
252+
/// </summary>
253+
public static unsafe Vector<uint> AddCarryWideningUpper(Vector<uint> op1, Vector<uint> op2, Vector<uint> op3) => AddCarryWideningUpper(op1, op2, op3);
254+
255+
/// <summary>
256+
/// svuint64_t svadclt[_u64](svuint64_t op1, svuint64_t op2, svuint64_t op3)
257+
/// ADCLT Ztied1.D, Zop2.D, Zop3.D
258+
/// </summary>
259+
public static unsafe Vector<ulong> AddCarryWideningUpper(Vector<ulong> op1, Vector<ulong> op2, Vector<ulong> op3) => AddCarryWideningUpper(op1, op2, op3);
260+
233261
// Bitwise clear and exclusive OR
234262

235263
/// <summary>

src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6116,6 +6116,10 @@ internal Arm64() { }
61166116
public static System.Numerics.Vector<uint> AbsoluteDifferenceWideningUpper(System.Numerics.Vector<ushort> left, System.Numerics.Vector<ushort> right) { throw null; }
61176117
public static System.Numerics.Vector<ulong> AbsoluteDifferenceWideningUpper(System.Numerics.Vector<uint> left, System.Numerics.Vector<uint> right) { throw null; }
61186118

6119+
public static System.Numerics.Vector<uint> AddCarryWideningLower(System.Numerics.Vector<uint> op1, System.Numerics.Vector<uint> op2, System.Numerics.Vector<uint> op3) { throw null; }
6120+
public static System.Numerics.Vector<ulong> AddCarryWideningLower(System.Numerics.Vector<ulong> op1, System.Numerics.Vector<ulong> op2, System.Numerics.Vector<ulong> op3) { throw null; }
6121+
public static System.Numerics.Vector<uint> AddCarryWideningUpper(System.Numerics.Vector<uint> op1, System.Numerics.Vector<uint> op2, System.Numerics.Vector<uint> op3) { throw null; }
6122+
public static System.Numerics.Vector<ulong> AddCarryWideningUpper(System.Numerics.Vector<ulong> op1, System.Numerics.Vector<ulong> op2, System.Numerics.Vector<ulong> op3) { throw null; }
61196123
public static System.Numerics.Vector<byte> BitwiseClearXor(System.Numerics.Vector<byte> xor, System.Numerics.Vector<byte> value, System.Numerics.Vector<byte> mask) { throw null; }
61206124
public static System.Numerics.Vector<short> BitwiseClearXor(System.Numerics.Vector<short> xor, System.Numerics.Vector<short> value, System.Numerics.Vector<short> mask) { throw null; }
61216125
public static System.Numerics.Vector<int> BitwiseClearXor(System.Numerics.Vector<int> xor, System.Numerics.Vector<int> value, System.Numerics.Vector<int> mask) { throw null; }

0 commit comments

Comments
 (0)