From c69b267eff13a7984e73d77a6b148aeef44d7811 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Fri, 14 Feb 2025 05:37:21 -0800 Subject: [PATCH 1/3] Precommit unpredicated loads/stores tests --- .../AArch64/sve-unpred-loads-stores.ll | 441 ++++++++++++++++++ 1 file changed, 441 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll diff --git a/llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll b/llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll new file mode 100644 index 0000000000000..d1b8edaf6b9dc --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll @@ -0,0 +1,441 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -aarch64-sve-vector-bits-max=0 < %s | FileCheck %s --check-prefix=CHECK-VLA +; RUN: llc -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-128 + +target triple = "aarch64-unknown-linux-gnu" + +define @ld_nxv16i8(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv16i8: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv16i8: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ret + %2 = load , ptr %0, align 16 + ret %2 +} + +define void @st_nxv16i8(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv16i8: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv16i8: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: ret + store %1, ptr %0, align 16 + ret void +} + +define @ld_nxv8i16(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv8i16: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv8i16: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ret + %2 = load , ptr %0, align 16 + ret %2 +} + +define void @st_nxv8i16(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv8i16: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv8i16: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: ret + store %1, ptr %0, align 16 + ret void +} + +define @ld_nxv4i32(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv4i32: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv4i32: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ret + %2 = load , ptr %0, align 16 + ret %2 +} + +define void @st_nxv4i32(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv4i32: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv4i32: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: ret + store %1, ptr %0, align 16 + ret void +} + +define @ld_nxv2i64(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv2i64: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv2i64: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ret + %2 = load , ptr %0, align 16 + ret %2 +} + +define void @st_nxv2i64(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv2i64: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv2i64: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: ret + store %1, ptr %0, align 16 + ret void +} + +define @ld_nxv8f16(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv8f16: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv8f16: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ret + %2 = load , ptr %0, align 16 + ret %2 +} + +define void @st_nxv8f16(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv8f16: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv8f16: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: ret + store %1, ptr %0, align 16 + ret void +} + +define @ld_nxv4f32(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv4f32: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv4f32: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ret + %2 = load , ptr %0, align 16 + ret %2 +} + +define void @st_nxv4f32(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv4f32: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv4f32: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: ret + store %1, ptr %0, align 16 + ret void +} + +define @ld_nxv2f64(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv2f64: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv2f64: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ret + %2 = load , ptr %0, align 16 + ret %2 +} + +define void @st_nxv2f64(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv2f64: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv2f64: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: ret + store %1, ptr %0, align 16 + ret void +} + +define @ld_nxv16i8_offset(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv16i8_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv16i8_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %2 = tail call i64 @llvm.vscale.i64() + %3 = shl nuw nsw i64 %2, 4 + %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 + %5 = load , ptr %4, align 16 + ret %5 +} + +define void @st_nxv16i8_offset(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv16i8_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv16i8_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %3 = tail call i64 @llvm.vscale.i64() + %4 = shl nuw nsw i64 %3, 4 + %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 + store %1, ptr %5, align 16 + ret void +} + +define @ld_nxv8i16_offset(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv8i16_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv8i16_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %2 = tail call i64 @llvm.vscale.i64() + %3 = shl nuw nsw i64 %2, 4 + %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 + %5 = load , ptr %4, align 16 + ret %5 +} + +define void @st_nxv8i16_offset(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv8i16_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv8i16_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %3 = tail call i64 @llvm.vscale.i64() + %4 = shl nuw nsw i64 %3, 4 + %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 + store %1, ptr %5, align 16 + ret void +} + +define @ld_nxv4i32_offset(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv4i32_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv4i32_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %2 = tail call i64 @llvm.vscale.i64() + %3 = shl nuw nsw i64 %2, 4 + %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 + %5 = load , ptr %4, align 16 + ret %5 +} + +define void @st_nxv4i32_offset(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv4i32_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv4i32_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %3 = tail call i64 @llvm.vscale.i64() + %4 = shl nuw nsw i64 %3, 4 + %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 + store %1, ptr %5, align 16 + ret void +} + +define @ld_nxv2i64_offset(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv2i64_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv2i64_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %2 = tail call i64 @llvm.vscale.i64() + %3 = shl nuw nsw i64 %2, 4 + %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 + %5 = load , ptr %4, align 16 + ret %5 +} + +define void @st_nxv2i64_offset(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv2i64_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv2i64_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %3 = tail call i64 @llvm.vscale.i64() + %4 = shl nuw nsw i64 %3, 4 + %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 + store %1, ptr %5, align 16 + ret void +} + +define @ld_nxv8f16_offset(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv8f16_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv8f16_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %2 = tail call i64 @llvm.vscale.i64() + %3 = shl nuw nsw i64 %2, 4 + %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 + %5 = load , ptr %4, align 16 + ret %5 +} + +define void @st_nxv8f16_offset(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv8f16_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv8f16_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %3 = tail call i64 @llvm.vscale.i64() + %4 = shl nuw nsw i64 %3, 4 + %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 + store %1, ptr %5, align 16 + ret void +} + +define @ld_nxv4f32_offset(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv4f32_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv4f32_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %2 = tail call i64 @llvm.vscale.i64() + %3 = shl nuw nsw i64 %2, 4 + %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 + %5 = load , ptr %4, align 16 + ret %5 +} + +define void @st_nxv4f32_offset(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv4f32_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv4f32_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %3 = tail call i64 @llvm.vscale.i64() + %4 = shl nuw nsw i64 %3, 4 + %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 + store %1, ptr %5, align 16 + ret void +} + +define @ld_nxv2f64_offset(ptr %0) #0 { +; CHECK-VLA-LABEL: ld_nxv2f64_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: ld_nxv2f64_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %2 = tail call i64 @llvm.vscale.i64() + %3 = shl nuw nsw i64 %2, 4 + %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 + %5 = load , ptr %4, align 16 + ret %5 +} + +define void @st_nxv2f64_offset(ptr %0, %1) #0 { +; CHECK-VLA-LABEL: st_nxv2f64_offset: +; CHECK-VLA: // %bb.0: +; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] +; CHECK-VLA-NEXT: ret +; +; CHECK-128-LABEL: st_nxv2f64_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: str z0, [x0, #1, mul vl] +; CHECK-128-NEXT: ret + %3 = tail call i64 @llvm.vscale.i64() + %4 = shl nuw nsw i64 %3, 4 + %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 + store %1, ptr %5, align 16 + ret void +} + +attributes #0 = { "target-features"="+sve" } From 4ddc20077ad9c0d4a10d75316e7ce474663efc57 Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Fri, 14 Feb 2025 05:37:32 -0800 Subject: [PATCH 2/3] [AArch64][SVE] Lower unpredicated loads/stores as LDR/STR with sve-vector-bits=128. Given the code below: ```cpp svuint8_t foo(uint8_t *x) { return svld1(svptrue_b8(), x); } ``` When compiled with -msve-vector-bits=128 (or vscale_range(1, 1)), we currently generate: ```gas foo: ptrue p0.b ld1b { z0.b }, p0/z, [x0] ret ``` Whereas (on little-endian) we could instead be using LDR as follows: ```gas foo: ldr q0, [x0] ret ``` Besides avoiding the predicate dependency, the above form enables further optimisations such as LDP folds. Likewise for stores. --- .../Target/AArch64/AArch64ISelLowering.cpp | 51 +++ .../AArch64/sve-unpred-loads-stores.ll | 400 ++---------------- 2 files changed, 95 insertions(+), 356 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0db6c614684d7..600225175e138 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -23654,6 +23654,28 @@ static SDValue combineV3I8LoadExt(LoadSDNode *LD, SelectionDAG &DAG) { return DAG.getMergeValues({Extract, TokenFactor}, DL); } +// Replace packed scalable loads with fixed loads when vscale_range(1, 1). +// This enables further optimisations such as LDP folds. +static SDValue combineVScale1Load(LoadSDNode *LD, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + EVT MemVT = LD->getMemoryVT(); + if (!DCI.isBeforeLegalize() || !Subtarget->hasNEON() || + !MemVT.isScalableVector() || LD->getExtensionType() != ISD::NON_EXTLOAD || + MemVT.getSizeInBits().getKnownMinValue() != 128 || + Subtarget->getMaxSVEVectorSizeInBits() != 128) + return SDValue(); + + SDLoc DL(LD); + MVT NewVT = MVT::getVectorVT(MemVT.getVectorElementType().getSimpleVT(), + MemVT.getVectorMinNumElements()); + SDValue NewLoad = DAG.getLoad( + NewVT, DL, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); + SDValue Insert = convertToScalableVector(DAG, MemVT, NewLoad); + return DAG.getMergeValues({Insert, SDValue(cast(NewLoad), 1)}, DL); +} + // Perform TBI simplification if supported by the target and try to break up // nontemporal loads larger than 256-bits loads for odd types so LDNPQ 256-bit // load instructions can be selected. @@ -23691,6 +23713,9 @@ static SDValue performLOADCombine(SDNode *N, if (SDValue Res = combineV3I8LoadExt(LD, DAG)) return Res; + if (SDValue Res = combineVScale1Load(LD, DAG, DCI, Subtarget)) + return Res; + if (!LD->isNonTemporal()) return SDValue(N, 0); @@ -23949,6 +23974,29 @@ static SDValue combineI8TruncStore(StoreSDNode *ST, SelectionDAG &DAG, return Chain; } +// Replace packed scalable stores with fixed stores when vscale_range(1, 1). +static SDValue combineVScale1Store(StoreSDNode *ST, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *Subtarget) { + SDValue Value = ST->getValue(); + EVT ValueVT = Value.getValueType(); + if (ST->isVolatile() || !Subtarget->isLittleEndian() || + !DCI.isBeforeLegalize() || !Subtarget->hasNEON() || + !ValueVT.isScalableVector() || ST->isTruncatingStore() || + ValueVT.getSizeInBits().getKnownMinValue() != 128 || + Subtarget->getMaxSVEVectorSizeInBits() != 128) + return SDValue(); + + SDLoc DL(ST); + MVT NewVT = MVT::getVectorVT(ValueVT.getVectorElementType().getSimpleVT(), + ValueVT.getVectorMinNumElements()); + SDValue NewValue = convertFromScalableVector(DAG, NewVT, Value); + SDValue NewStore = DAG.getStore( + ST->getChain(), DL, NewValue, ST->getBasePtr(), ST->getPointerInfo(), + ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()); + return NewStore; +} + static unsigned getFPSubregForVT(EVT VT) { assert(VT.isSimple() && "Expected simple VT"); switch (VT.getSimpleVT().SimpleTy) { @@ -23997,6 +24045,9 @@ static SDValue performSTORECombine(SDNode *N, if (SDValue Res = combineI8TruncStore(ST, DAG, Subtarget)) return Res; + if (SDValue Res = combineVScale1Store(ST, DAG, DCI, Subtarget)) + return Res; + // If this is an FP_ROUND followed by a store, fold this into a truncating // store. We can do this even if this is already a truncstore. // We purposefully don't care about legality of the nodes here as we know diff --git a/llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll b/llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll index d1b8edaf6b9dc..94e23cc2fe6ec 100644 --- a/llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll @@ -1,441 +1,129 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -aarch64-sve-vector-bits-max=0 < %s | FileCheck %s --check-prefix=CHECK-VLA -; RUN: llc -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-128 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-128 +; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | not grep -e ldr -e str -target triple = "aarch64-unknown-linux-gnu" - -define @ld_nxv16i8(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv16i8: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0] -; CHECK-VLA-NEXT: ret -; +define @ld_nxv16i8(ptr %0) { ; CHECK-128-LABEL: ld_nxv16i8: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret - %2 = load , ptr %0, align 16 + %2 = load , ptr %0, align 1 ret %2 } -define void @st_nxv16i8(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv16i8: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0] -; CHECK-VLA-NEXT: ret -; +define void @st_nxv16i8(ptr %0, %1) { ; CHECK-128-LABEL: st_nxv16i8: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret - store %1, ptr %0, align 16 + store %1, ptr %0, align 1 ret void } -define @ld_nxv8i16(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv8i16: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0] -; CHECK-VLA-NEXT: ret -; +define @ld_nxv8i16(ptr %0) { ; CHECK-128-LABEL: ld_nxv8i16: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret - %2 = load , ptr %0, align 16 + %2 = load , ptr %0, align 2 ret %2 } -define void @st_nxv8i16(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv8i16: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0] -; CHECK-VLA-NEXT: ret -; +define void @st_nxv8i16(ptr %0, %1) { ; CHECK-128-LABEL: st_nxv8i16: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret - store %1, ptr %0, align 16 + store %1, ptr %0, align 2 ret void } -define @ld_nxv4i32(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv4i32: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0] -; CHECK-VLA-NEXT: ret -; +define @ld_nxv4i32(ptr %0) { ; CHECK-128-LABEL: ld_nxv4i32: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret - %2 = load , ptr %0, align 16 + %2 = load , ptr %0, align 4 ret %2 } -define void @st_nxv4i32(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv4i32: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0] -; CHECK-VLA-NEXT: ret -; +define void @st_nxv4i32(ptr %0, %1) { ; CHECK-128-LABEL: st_nxv4i32: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret - store %1, ptr %0, align 16 + store %1, ptr %0, align 4 ret void } -define @ld_nxv2i64(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv2i64: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0] -; CHECK-VLA-NEXT: ret -; +define @ld_nxv2i64(ptr %0) { ; CHECK-128-LABEL: ld_nxv2i64: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret - %2 = load , ptr %0, align 16 + %2 = load , ptr %0, align 8 ret %2 } -define void @st_nxv2i64(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv2i64: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0] -; CHECK-VLA-NEXT: ret -; +define void @st_nxv2i64(ptr %0, %1) { ; CHECK-128-LABEL: st_nxv2i64: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret - store %1, ptr %0, align 16 + store %1, ptr %0, align 8 ret void } -define @ld_nxv8f16(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv8f16: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0] -; CHECK-VLA-NEXT: ret -; +define @ld_nxv8f16(ptr %0) { ; CHECK-128-LABEL: ld_nxv8f16: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret - %2 = load , ptr %0, align 16 + %2 = load , ptr %0, align 2 ret %2 } -define void @st_nxv8f16(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv8f16: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0] -; CHECK-VLA-NEXT: ret -; +define void @st_nxv8f16(ptr %0, %1) { ; CHECK-128-LABEL: st_nxv8f16: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret - store %1, ptr %0, align 16 + store %1, ptr %0, align 2 ret void } -define @ld_nxv4f32(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv4f32: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0] -; CHECK-VLA-NEXT: ret -; +define @ld_nxv4f32(ptr %0) { ; CHECK-128-LABEL: ld_nxv4f32: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret - %2 = load , ptr %0, align 16 + %2 = load , ptr %0, align 4 ret %2 } -define void @st_nxv4f32(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv4f32: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0] -; CHECK-VLA-NEXT: ret -; +define void @st_nxv4f32(ptr %0, %1) { ; CHECK-128-LABEL: st_nxv4f32: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret - store %1, ptr %0, align 16 + store %1, ptr %0, align 4 ret void } -define @ld_nxv2f64(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv2f64: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0] -; CHECK-VLA-NEXT: ret -; +define @ld_nxv2f64(ptr %0) { ; CHECK-128-LABEL: ld_nxv2f64: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0] +; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret - %2 = load , ptr %0, align 16 + %2 = load , ptr %0, align 8 ret %2 } -define void @st_nxv2f64(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv2f64: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0] -; CHECK-VLA-NEXT: ret -; +define void @st_nxv2f64(ptr %0, %1) { ; CHECK-128-LABEL: st_nxv2f64: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0] +; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret - store %1, ptr %0, align 16 + store %1, ptr %0, align 8 ret void } - -define @ld_nxv16i8_offset(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv16i8_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: ld_nxv16i8_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %2 = tail call i64 @llvm.vscale.i64() - %3 = shl nuw nsw i64 %2, 4 - %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 - %5 = load , ptr %4, align 16 - ret %5 -} - -define void @st_nxv16i8_offset(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv16i8_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: st_nxv16i8_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %3 = tail call i64 @llvm.vscale.i64() - %4 = shl nuw nsw i64 %3, 4 - %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 - store %1, ptr %5, align 16 - ret void -} - -define @ld_nxv8i16_offset(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv8i16_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: ld_nxv8i16_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %2 = tail call i64 @llvm.vscale.i64() - %3 = shl nuw nsw i64 %2, 4 - %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 - %5 = load , ptr %4, align 16 - ret %5 -} - -define void @st_nxv8i16_offset(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv8i16_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: st_nxv8i16_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %3 = tail call i64 @llvm.vscale.i64() - %4 = shl nuw nsw i64 %3, 4 - %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 - store %1, ptr %5, align 16 - ret void -} - -define @ld_nxv4i32_offset(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv4i32_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: ld_nxv4i32_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %2 = tail call i64 @llvm.vscale.i64() - %3 = shl nuw nsw i64 %2, 4 - %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 - %5 = load , ptr %4, align 16 - ret %5 -} - -define void @st_nxv4i32_offset(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv4i32_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: st_nxv4i32_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %3 = tail call i64 @llvm.vscale.i64() - %4 = shl nuw nsw i64 %3, 4 - %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 - store %1, ptr %5, align 16 - ret void -} - -define @ld_nxv2i64_offset(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv2i64_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: ld_nxv2i64_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %2 = tail call i64 @llvm.vscale.i64() - %3 = shl nuw nsw i64 %2, 4 - %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 - %5 = load , ptr %4, align 16 - ret %5 -} - -define void @st_nxv2i64_offset(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv2i64_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: st_nxv2i64_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %3 = tail call i64 @llvm.vscale.i64() - %4 = shl nuw nsw i64 %3, 4 - %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 - store %1, ptr %5, align 16 - ret void -} - -define @ld_nxv8f16_offset(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv8f16_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: ld_nxv8f16_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %2 = tail call i64 @llvm.vscale.i64() - %3 = shl nuw nsw i64 %2, 4 - %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 - %5 = load , ptr %4, align 16 - ret %5 -} - -define void @st_nxv8f16_offset(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv8f16_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: st_nxv8f16_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %3 = tail call i64 @llvm.vscale.i64() - %4 = shl nuw nsw i64 %3, 4 - %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 - store %1, ptr %5, align 16 - ret void -} - -define @ld_nxv4f32_offset(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv4f32_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: ld_nxv4f32_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %2 = tail call i64 @llvm.vscale.i64() - %3 = shl nuw nsw i64 %2, 4 - %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 - %5 = load , ptr %4, align 16 - ret %5 -} - -define void @st_nxv4f32_offset(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv4f32_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: st_nxv4f32_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %3 = tail call i64 @llvm.vscale.i64() - %4 = shl nuw nsw i64 %3, 4 - %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 - store %1, ptr %5, align 16 - ret void -} - -define @ld_nxv2f64_offset(ptr %0) #0 { -; CHECK-VLA-LABEL: ld_nxv2f64_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: ld_nxv2f64_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %2 = tail call i64 @llvm.vscale.i64() - %3 = shl nuw nsw i64 %2, 4 - %4 = getelementptr inbounds nuw i8, ptr %0, i64 %3 - %5 = load , ptr %4, align 16 - ret %5 -} - -define void @st_nxv2f64_offset(ptr %0, %1) #0 { -; CHECK-VLA-LABEL: st_nxv2f64_offset: -; CHECK-VLA: // %bb.0: -; CHECK-VLA-NEXT: str z0, [x0, #1, mul vl] -; CHECK-VLA-NEXT: ret -; -; CHECK-128-LABEL: st_nxv2f64_offset: -; CHECK-128: // %bb.0: -; CHECK-128-NEXT: str z0, [x0, #1, mul vl] -; CHECK-128-NEXT: ret - %3 = tail call i64 @llvm.vscale.i64() - %4 = shl nuw nsw i64 %3, 4 - %5 = getelementptr inbounds nuw i8, ptr %0, i64 %4 - store %1, ptr %5, align 16 - ret void -} - -attributes #0 = { "target-features"="+sve" } From 70568c98d3000ea08d354aad8abd31efd29914dd Mon Sep 17 00:00:00 2001 From: Ricardo Jesus Date: Thu, 6 Mar 2025 08:30:33 -0800 Subject: [PATCH 3/3] Address comments and rebase patch --- .../Target/AArch64/AArch64ISelLowering.cpp | 27 +++-- .../AArch64/sve-fixed-length-offsets.ll | 16 +-- .../AArch64/sve-unpred-loads-stores.ll | 112 +++++++++++++++++- 3 files changed, 133 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 600225175e138..c7de92f843c7e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -23660,20 +23660,21 @@ static SDValue combineVScale1Load(LoadSDNode *LD, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { EVT MemVT = LD->getMemoryVT(); - if (!DCI.isBeforeLegalize() || !Subtarget->hasNEON() || - !MemVT.isScalableVector() || LD->getExtensionType() != ISD::NON_EXTLOAD || + if (!DCI.isBeforeLegalize() || !Subtarget->isLittleEndian() || + !Subtarget->hasNEON() || !MemVT.isScalableVector() || + LD->getExtensionType() != ISD::NON_EXTLOAD || MemVT.getSizeInBits().getKnownMinValue() != 128 || Subtarget->getMaxSVEVectorSizeInBits() != 128) return SDValue(); SDLoc DL(LD); - MVT NewVT = MVT::getVectorVT(MemVT.getVectorElementType().getSimpleVT(), + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(), MemVT.getVectorMinNumElements()); SDValue NewLoad = DAG.getLoad( NewVT, DL, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); SDValue Insert = convertToScalableVector(DAG, MemVT, NewLoad); - return DAG.getMergeValues({Insert, SDValue(cast(NewLoad), 1)}, DL); + return DAG.getMergeValues({Insert, NewLoad.getValue(1)}, DL); } // Perform TBI simplification if supported by the target and try to break up @@ -23980,21 +23981,21 @@ static SDValue combineVScale1Store(StoreSDNode *ST, SelectionDAG &DAG, const AArch64Subtarget *Subtarget) { SDValue Value = ST->getValue(); EVT ValueVT = Value.getValueType(); - if (ST->isVolatile() || !Subtarget->isLittleEndian() || - !DCI.isBeforeLegalize() || !Subtarget->hasNEON() || - !ValueVT.isScalableVector() || ST->isTruncatingStore() || + if (!DCI.isBeforeLegalize() || !Subtarget->isLittleEndian() || + !Subtarget->hasNEON() || !ValueVT.isScalableVector() || + ST->isTruncatingStore() || ValueVT.getSizeInBits().getKnownMinValue() != 128 || Subtarget->getMaxSVEVectorSizeInBits() != 128) return SDValue(); SDLoc DL(ST); - MVT NewVT = MVT::getVectorVT(ValueVT.getVectorElementType().getSimpleVT(), - ValueVT.getVectorMinNumElements()); + EVT NewVT = + EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(), + ValueVT.getVectorMinNumElements()); SDValue NewValue = convertFromScalableVector(DAG, NewVT, Value); - SDValue NewStore = DAG.getStore( - ST->getChain(), DL, NewValue, ST->getBasePtr(), ST->getPointerInfo(), - ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()); - return NewStore; + return DAG.getStore(ST->getChain(), DL, NewValue, ST->getBasePtr(), + ST->getPointerInfo(), ST->getOriginalAlign(), + ST->getMemOperand()->getFlags(), ST->getAAInfo()); } static unsigned getFPSubregForVT(EVT VT) { diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll index d7b67d73a671e..8aba77d365d6e 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll @@ -17,8 +17,8 @@ define void @nxv16i8(ptr %ldptr, ptr %stptr) { ; ; CHECK-128-LABEL: nxv16i8: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0, #16, mul vl] -; CHECK-128-NEXT: str z0, [x1, #16, mul vl] +; CHECK-128-NEXT: ldr q0, [x0, #256] +; CHECK-128-NEXT: str q0, [x1, #256] ; CHECK-128-NEXT: ret ; ; CHECK-256-LABEL: nxv16i8: @@ -62,8 +62,8 @@ define void @nxv8i16(ptr %ldptr, ptr %stptr) { ; ; CHECK-128-LABEL: nxv8i16: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0, #16, mul vl] -; CHECK-128-NEXT: str z0, [x1, #16, mul vl] +; CHECK-128-NEXT: ldr q0, [x0, #256] +; CHECK-128-NEXT: str q0, [x1, #256] ; CHECK-128-NEXT: ret ; ; CHECK-256-LABEL: nxv8i16: @@ -107,8 +107,8 @@ define void @nxv4i32(ptr %ldptr, ptr %stptr) { ; ; CHECK-128-LABEL: nxv4i32: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0, #16, mul vl] -; CHECK-128-NEXT: str z0, [x1, #16, mul vl] +; CHECK-128-NEXT: ldr q0, [x0, #256] +; CHECK-128-NEXT: str q0, [x1, #256] ; CHECK-128-NEXT: ret ; ; CHECK-256-LABEL: nxv4i32: @@ -152,8 +152,8 @@ define void @nxv2i64(ptr %ldptr, ptr %stptr) { ; ; CHECK-128-LABEL: nxv2i64: ; CHECK-128: // %bb.0: -; CHECK-128-NEXT: ldr z0, [x0, #16, mul vl] -; CHECK-128-NEXT: str z0, [x1, #16, mul vl] +; CHECK-128-NEXT: ldr q0, [x0, #256] +; CHECK-128-NEXT: str q0, [x1, #256] ; CHECK-128-NEXT: ret ; ; CHECK-256-LABEL: nxv2i64: diff --git a/llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll b/llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll index 94e23cc2fe6ec..4d6ee892c7f49 100644 --- a/llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-unpred-loads-stores.ll @@ -1,12 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-128 -; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | not grep -e ldr -e str +; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s --check-prefix=CHECK-BE-128 define @ld_nxv16i8(ptr %0) { ; CHECK-128-LABEL: ld_nxv16i8: ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: ld_nxv16i8: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.b +; CHECK-BE-128-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-BE-128-NEXT: ret %2 = load , ptr %0, align 1 ret %2 } @@ -16,6 +22,12 @@ define void @st_nxv16i8(ptr %0, %1) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: st_nxv16i8: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.b +; CHECK-BE-128-NEXT: st1b { z0.b }, p0, [x0] +; CHECK-BE-128-NEXT: ret store %1, ptr %0, align 1 ret void } @@ -25,6 +37,12 @@ define @ld_nxv8i16(ptr %0) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: ld_nxv8i16: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.h +; CHECK-BE-128-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-BE-128-NEXT: ret %2 = load , ptr %0, align 2 ret %2 } @@ -34,6 +52,12 @@ define void @st_nxv8i16(ptr %0, %1) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: st_nxv8i16: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.h +; CHECK-BE-128-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-BE-128-NEXT: ret store %1, ptr %0, align 2 ret void } @@ -43,6 +67,12 @@ define @ld_nxv4i32(ptr %0) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: ld_nxv4i32: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.s +; CHECK-BE-128-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-BE-128-NEXT: ret %2 = load , ptr %0, align 4 ret %2 } @@ -52,6 +82,12 @@ define void @st_nxv4i32(ptr %0, %1) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: st_nxv4i32: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.s +; CHECK-BE-128-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-BE-128-NEXT: ret store %1, ptr %0, align 4 ret void } @@ -61,6 +97,12 @@ define @ld_nxv2i64(ptr %0) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: ld_nxv2i64: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.d +; CHECK-BE-128-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-BE-128-NEXT: ret %2 = load , ptr %0, align 8 ret %2 } @@ -70,6 +112,12 @@ define void @st_nxv2i64(ptr %0, %1) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: st_nxv2i64: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.d +; CHECK-BE-128-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-BE-128-NEXT: ret store %1, ptr %0, align 8 ret void } @@ -79,6 +127,12 @@ define @ld_nxv8f16(ptr %0) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: ld_nxv8f16: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.h +; CHECK-BE-128-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-BE-128-NEXT: ret %2 = load , ptr %0, align 2 ret %2 } @@ -88,6 +142,12 @@ define void @st_nxv8f16(ptr %0, %1) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: st_nxv8f16: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.h +; CHECK-BE-128-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-BE-128-NEXT: ret store %1, ptr %0, align 2 ret void } @@ -97,6 +157,12 @@ define @ld_nxv4f32(ptr %0) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: ld_nxv4f32: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.s +; CHECK-BE-128-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-BE-128-NEXT: ret %2 = load , ptr %0, align 4 ret %2 } @@ -106,6 +172,12 @@ define void @st_nxv4f32(ptr %0, %1) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: st_nxv4f32: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.s +; CHECK-BE-128-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-BE-128-NEXT: ret store %1, ptr %0, align 4 ret void } @@ -115,6 +187,12 @@ define @ld_nxv2f64(ptr %0) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: ldr q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: ld_nxv2f64: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.d +; CHECK-BE-128-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-BE-128-NEXT: ret %2 = load , ptr %0, align 8 ret %2 } @@ -124,6 +202,38 @@ define void @st_nxv2f64(ptr %0, %1) { ; CHECK-128: // %bb.0: ; CHECK-128-NEXT: str q0, [x0] ; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: st_nxv2f64: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.d +; CHECK-BE-128-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-BE-128-NEXT: ret store %1, ptr %0, align 8 ret void } + +; Test LDP/STP fold. +define void @ldp_stp_nxv16i8_offset(ptr %ldptr, ptr %stptr) { +; CHECK-128-LABEL: ldp_stp_nxv16i8_offset: +; CHECK-128: // %bb.0: +; CHECK-128-NEXT: ldp q0, q1, [x0, #-16] +; CHECK-128-NEXT: stp q0, q1, [x1, #-16] +; CHECK-128-NEXT: ret +; +; CHECK-BE-128-LABEL: ldp_stp_nxv16i8_offset: +; CHECK-BE-128: // %bb.0: +; CHECK-BE-128-NEXT: ptrue p0.b +; CHECK-BE-128-NEXT: mov x8, #-16 // =0xfffffffffffffff0 +; CHECK-BE-128-NEXT: ld1b { z0.b }, p0/z, [x0, x8] +; CHECK-BE-128-NEXT: ld1b { z1.b }, p0/z, [x0] +; CHECK-BE-128-NEXT: st1b { z0.b }, p0, [x1, x8] +; CHECK-BE-128-NEXT: st1b { z1.b }, p0, [x1] +; CHECK-BE-128-NEXT: ret + %ldptr.1 = getelementptr inbounds i8, ptr %ldptr, i64 -16 + %ld1 = load , ptr %ldptr.1, align 1 + %ld2 = load , ptr %ldptr, align 1 + %stptr.1 = getelementptr inbounds i8, ptr %stptr, i64 -16 + store %ld1, ptr %stptr.1, align 1 + store %ld2, ptr %stptr, align 1 + ret void +}