Skip to content

Commit f6ace2b

Browse files
[AArch64] Expand vector ops when NEON and SVE are unavailable. (#90833)
Unlike `+noneon` we must assume that vector types are available, i.e. it is valid to pass/return vector arguments to and from functions. However, the compiler must make sure to scalarize any vector operations.
1 parent fa649df commit f6ace2b

File tree

59 files changed

+49850
-13227
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+49850
-13227
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+54-25
Original file line numberDiff line numberDiff line change
@@ -360,24 +360,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
360360
if (Subtarget->hasNEON()) {
361361
addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
362362
addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
363-
// Someone set us up the NEON.
364-
addDRTypeForNEON(MVT::v2f32);
365-
addDRTypeForNEON(MVT::v8i8);
366-
addDRTypeForNEON(MVT::v4i16);
367-
addDRTypeForNEON(MVT::v2i32);
368-
addDRTypeForNEON(MVT::v1i64);
369-
addDRTypeForNEON(MVT::v1f64);
370-
addDRTypeForNEON(MVT::v4f16);
371-
addDRTypeForNEON(MVT::v4bf16);
372-
373-
addQRTypeForNEON(MVT::v4f32);
374-
addQRTypeForNEON(MVT::v2f64);
375-
addQRTypeForNEON(MVT::v16i8);
376-
addQRTypeForNEON(MVT::v8i16);
377-
addQRTypeForNEON(MVT::v4i32);
378-
addQRTypeForNEON(MVT::v2i64);
379-
addQRTypeForNEON(MVT::v8f16);
380-
addQRTypeForNEON(MVT::v8bf16);
363+
364+
addDRType(MVT::v2f32);
365+
addDRType(MVT::v8i8);
366+
addDRType(MVT::v4i16);
367+
addDRType(MVT::v2i32);
368+
addDRType(MVT::v1i64);
369+
addDRType(MVT::v1f64);
370+
addDRType(MVT::v4f16);
371+
addDRType(MVT::v4bf16);
372+
373+
addQRType(MVT::v4f32);
374+
addQRType(MVT::v2f64);
375+
addQRType(MVT::v16i8);
376+
addQRType(MVT::v8i16);
377+
addQRType(MVT::v4i32);
378+
addQRType(MVT::v2i64);
379+
addQRType(MVT::v8f16);
380+
addQRType(MVT::v8bf16);
381381
}
382382

383383
if (Subtarget->hasSVEorSME()) {
@@ -1125,7 +1125,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
11251125

11261126
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
11271127

1128-
if (Subtarget->hasNEON()) {
1128+
if (Subtarget->isNeonAvailable()) {
11291129
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
11301130
// silliness like this:
11311131
for (auto Op :
@@ -1337,6 +1337,24 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13371337
// FADDP custom lowering
13381338
for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
13391339
setOperationAction(ISD::FADD, VT, Custom);
1340+
} else /* !isNeonAvailable */ {
1341+
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1342+
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1343+
setOperationAction(Op, VT, Expand);
1344+
1345+
if (VT.is128BitVector() || VT.is64BitVector()) {
1346+
setOperationAction(ISD::LOAD, VT, Legal);
1347+
setOperationAction(ISD::STORE, VT, Legal);
1348+
setOperationAction(ISD::BITCAST, VT,
1349+
Subtarget->isLittleEndian() ? Legal : Expand);
1350+
}
1351+
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1352+
setTruncStoreAction(VT, InnerVT, Expand);
1353+
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1354+
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1355+
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1356+
}
1357+
}
13401358
}
13411359

13421360
if (Subtarget->hasSME()) {
@@ -2020,14 +2038,16 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
20202038
setOperationAction(ISD::ZERO_EXTEND, VT, Default);
20212039
}
20222040

2023-
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
2041+
void AArch64TargetLowering::addDRType(MVT VT) {
20242042
addRegisterClass(VT, &AArch64::FPR64RegClass);
2025-
addTypeForNEON(VT);
2043+
if (Subtarget->isNeonAvailable())
2044+
addTypeForNEON(VT);
20262045
}
20272046

2028-
void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
2047+
void AArch64TargetLowering::addQRType(MVT VT) {
20292048
addRegisterClass(VT, &AArch64::FPR128RegClass);
2030-
addTypeForNEON(VT);
2049+
if (Subtarget->isNeonAvailable())
2050+
addTypeForNEON(VT);
20312051
}
20322052

20332053
EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
@@ -9445,7 +9465,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
94459465

94469466
SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
94479467
SelectionDAG &DAG) const {
9448-
if (!Subtarget->hasNEON())
9468+
if (!Subtarget->isNeonAvailable() &&
9469+
!Subtarget->useSVEForFixedLengthVectors())
94499470
return SDValue();
94509471

94519472
EVT VT = Op.getValueType();
@@ -14141,6 +14162,13 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
1414114162
return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
1414214163
}
1414314164

14165+
bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
14166+
EVT VT, unsigned DefinedValues) const {
14167+
if (!Subtarget->isNeonAvailable())
14168+
return false;
14169+
return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
14170+
}
14171+
1414414172
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
1414514173
// Currently no fixed length shuffles that require SVE are legal.
1414614174
if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
@@ -19838,7 +19866,8 @@ performSVEMulAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
1983819866
// help, for example, to produce ssra from sshr+add.
1983919867
static SDValue performAddSubIntoVectorOp(SDNode *N, SelectionDAG &DAG) {
1984019868
EVT VT = N->getValueType(0);
19841-
if (VT != MVT::i64)
19869+
if (VT != MVT::i64 ||
19870+
DAG.getTargetLoweringInfo().isOperationExpand(N->getOpcode(), MVT::v1i64))
1984219871
return SDValue();
1984319872
SDValue Op0 = N->getOperand(0);
1984419873
SDValue Op1 = N->getOperand(1);

llvm/lib/Target/AArch64/AArch64ISelLowering.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -1017,8 +1017,10 @@ class AArch64TargetLowering : public TargetLowering {
10171017

10181018
void addTypeForNEON(MVT VT);
10191019
void addTypeForFixedLengthSVE(MVT VT);
1020-
void addDRTypeForNEON(MVT VT);
1021-
void addQRTypeForNEON(MVT VT);
1020+
void addDRType(MVT VT);
1021+
void addQRType(MVT VT);
1022+
1023+
bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
10221024

10231025
unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL,
10241026
SelectionDAG &DAG) const;

0 commit comments

Comments
 (0)