Skip to content

Commit a635d7a

Browse files
committed
[AArch64] Disable consecutive store merging when Neon is unavailable
Lowering fixed-size BUILD_VECTORS without Neon may introduce stack spills, leading to more stores/reloads than if the stores were not merged. In some cases, it can also prevent using paired store instructions. In the future, we may want to relax when SVE is available, but currently, the SVE lowerings for BUILD_VECTOR are limited to a few specific cases.
1 parent b0e3b9d commit a635d7a

File tree

3 files changed

+23
-26
lines changed

3 files changed

+23
-26
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27924,6 +27924,23 @@ bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
2792427924
return OptSize && !VT.isVector();
2792527925
}
2792627926

27927+
bool AArch64TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
27928+
const MachineFunction &MF) const {
27929+
// Avoid merging stores into fixed-length vectors when Neon is unavailable.
27930+
// Until we have more general SVE lowerings for BUILD_VECTOR this may
27931+
// introduce stack spills.
27932+
if (MemVT.isFixedLengthVector() && !Subtarget->isNeonAvailable())
27933+
return false;
27934+
27935+
// Do not merge to float value size (128 bytes) if no implicit
27936+
// float attribute is set.
27937+
bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
27938+
27939+
if (NoFloat)
27940+
return (MemVT.getSizeInBits() <= 64);
27941+
return true;
27942+
}
27943+
2792727944
bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
2792827945
// We want inc-of-add for scalars and sub-of-not for vectors.
2792927946
return VT.isScalarInteger();

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -849,16 +849,7 @@ class AArch64TargetLowering : public TargetLowering {
849849
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
850850

851851
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
852-
const MachineFunction &MF) const override {
853-
// Do not merge to float value size (128 bytes) if no implicit
854-
// float attribute is set.
855-
856-
bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
857-
858-
if (NoFloat)
859-
return (MemVT.getSizeInBits() <= 64);
860-
return true;
861-
}
852+
const MachineFunction &MF) const override;
862853

863854
bool isCheapToSpeculateCttz(Type *) const override {
864855
return true;

llvm/test/CodeGen/AArch64/consecutive-stores-of-faddv.ll

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,10 @@ define void @consecutive_stores_quadruple(ptr %dest0,
5555
define void @consecutive_stores_pair_streaming_function(ptr %dest0, <vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1) "aarch64_pstate_sm_enabled" {
5656
; CHECK-LABEL: consecutive_stores_pair_streaming_function:
5757
; CHECK: // %bb.0:
58-
; CHECK-NEXT: sub sp, sp, #16
59-
; CHECK-NEXT: .cfi_def_cfa_offset 16
6058
; CHECK-NEXT: ptrue p0.s
61-
; CHECK-NEXT: faddv s1, p0, z1.s
6259
; CHECK-NEXT: faddv s0, p0, z0.s
63-
; CHECK-NEXT: stp s0, s1, [sp, #8]
64-
; CHECK-NEXT: ldr d0, [sp, #8]
65-
; CHECK-NEXT: str d0, [x0]
66-
; CHECK-NEXT: add sp, sp, #16
60+
; CHECK-NEXT: faddv s1, p0, z1.s
61+
; CHECK-NEXT: stp s0, s1, [x0]
6762
; CHECK-NEXT: ret
6863
%dest1 = getelementptr inbounds i8, ptr %dest0, i64 4
6964
%reduce0 = call float @llvm.aarch64.sve.faddv.nxv4f32(<vscale x 4 x i1> splat(i1 true), <vscale x 4 x float> %vec0)
@@ -79,16 +74,10 @@ define void @consecutive_stores_quadruple_streaming_function(ptr %dest0,
7974
; CHECK-NEXT: ptrue p0.s
8075
; CHECK-NEXT: faddv s0, p0, z0.s
8176
; CHECK-NEXT: faddv s1, p0, z1.s
82-
; CHECK-NEXT: faddv s3, p0, z3.s
8377
; CHECK-NEXT: faddv s2, p0, z2.s
84-
; CHECK-NEXT: stp s0, s1, [sp, #-16]!
85-
; CHECK-NEXT: .cfi_def_cfa_offset 16
86-
; CHECK-NEXT: ldr d0, [sp]
87-
; CHECK-NEXT: str d0, [x0]
88-
; CHECK-NEXT: stp s2, s3, [sp, #8]
89-
; CHECK-NEXT: ldr d0, [sp, #8]
90-
; CHECK-NEXT: str d0, [x0, #8]
91-
; CHECK-NEXT: add sp, sp, #16
78+
; CHECK-NEXT: stp s0, s1, [x0]
79+
; CHECK-NEXT: faddv s3, p0, z3.s
80+
; CHECK-NEXT: stp s2, s3, [x0, #8]
9281
; CHECK-NEXT: ret
9382
<vscale x 4 x float> %vec0, <vscale x 4 x float> %vec1, <vscale x 4 x float> %vec2, <vscale x 4 x float> %vec3) "aarch64_pstate_sm_enabled"
9483
{

0 commit comments

Comments
 (0)