Skip to content

Commit cfff02f

Browse files
committed
[LV] Teach LoopVectorizationLegality about struct vector calls
This is a split-off from llvm#109833 and only adds code relating to checking if a struct-returning call can be vectorized. This initial patch only allows the case where all users of the struct return are `extractvalue` operations that can be widened. ``` %call = tail call { float, float } @foo(float %in_val) #0 %extract_a = extractvalue { float, float } %call, 0 %extract_b = extractvalue { float, float } %call, 1 ``` Note: The tests require the VFABI changes from llvm#119000 to pass.
1 parent f9d6d46 commit cfff02f

File tree

7 files changed

+418
-2
lines changed

7 files changed

+418
-2
lines changed

llvm/include/llvm/Analysis/VectorUtils.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ inline Type *ToVectorTy(Type *Scalar, unsigned VF) {
140140
return ToVectorTy(Scalar, ElementCount::getFixed(VF));
141141
}
142142

143+
/// Returns true if the call return type `Ty` can be widened by the loop
144+
/// vectorizer.
145+
bool canWidenCallReturnType(Type *Ty);
146+
143147
/// Identify if the intrinsic is trivially vectorizable.
144148
/// This method returns true if the intrinsic's argument types are all scalars
145149
/// for the scalar form of the intrinsic and all vectors (or scalars handled by

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,10 @@ class LoopVectorizationLegality {
417417
/// has a vectorized variant available.
418418
bool hasVectorCallVariants() const { return VecCallVariantsFound; }
419419

420+
/// Returns true if there is at least one function call in the loop which
421+
/// returns a struct type and needs to be vectorized.
422+
bool hasStructVectorCall() const { return StructVecVecCallFound; }
423+
420424
unsigned getNumStores() const { return LAI->getNumStores(); }
421425
unsigned getNumLoads() const { return LAI->getNumLoads(); }
422426

@@ -639,6 +643,12 @@ class LoopVectorizationLegality {
639643
/// the use of those function variants.
640644
bool VecCallVariantsFound = false;
641645

646+
/// If we find a call (to be vectorized) that returns a struct type, record
647+
/// that so we can bail out until this is supported.
648+
/// TODO: Remove this flag once vectorizing calls with struct returns is
649+
/// supported.
650+
bool StructVecVecCallFound = false;
651+
642652
/// Indicates whether this loop has an uncountable early exit, i.e. an
643653
/// uncountable exiting block that is not the latch.
644654
bool HasUncountableEarlyExit = false;

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,21 @@ static cl::opt<unsigned> MaxInterleaveGroupFactor(
3939
cl::desc("Maximum factor for an interleaved access group (default = 8)"),
4040
cl::init(8));
4141

42+
/// Returns true if the call return type `Ty` can be widened by the loop
43+
/// vectorizer.
44+
bool llvm::canWidenCallReturnType(Type *Ty) {
45+
Type *ElTy = Ty;
46+
// For now, only allow widening non-packed literal structs where all
47+
// element types are the same. This simplifies the cost model and
48+
// conversion between scalar and wide types.
49+
if (auto *StructTy = dyn_cast<StructType>(Ty);
50+
StructTy && !StructTy->isPacked() && StructTy->isLiteral() &&
51+
StructTy->containsHomogeneousTypes()) {
52+
ElTy = StructTy->elements().front();
53+
}
54+
return VectorType::isValidElementType(ElTy);
55+
}
56+
4257
/// Return true if all of the intrinsic's arguments and return type are scalars
4358
/// for the scalar form of the intrinsic, and vectors for the vector form of the
4459
/// intrinsic (except operands that are marked as always being scalar by

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -943,11 +943,24 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
943943
if (CI && !VFDatabase::getMappings(*CI).empty())
944944
VecCallVariantsFound = true;
945945

946+
auto canWidenInstruction = [this](Instruction const &Inst) {
947+
Type *InstTy = Inst.getType();
948+
if (isa<CallInst>(Inst) && isa<StructType>(InstTy) &&
949+
canWidenCallReturnType(InstTy)) {
950+
StructVecVecCallFound = true;
951+
// For now, we can only widen struct values returned from calls where
952+
// all users are extractvalue instructions.
953+
return llvm::all_of(Inst.uses(), [](auto &Use) {
954+
return isa<ExtractValueInst>(Use.getUser());
955+
});
956+
}
957+
return VectorType::isValidElementType(InstTy) || InstTy->isVoidTy();
958+
};
959+
946960
// Check that the instruction return type is vectorizable.
947961
// We can't vectorize casts from vector type to scalar type.
948962
// Also, we can't vectorize extractelement instructions.
949-
if ((!VectorType::isValidElementType(I.getType()) &&
950-
!I.getType()->isVoidTy()) ||
963+
if (!canWidenInstruction(I) ||
951964
(isa<CastInst>(I) &&
952965
!VectorType::isValidElementType(I.getOperand(0)->getType())) ||
953966
isa<ExtractElementInst>(I)) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10004,6 +10004,15 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1000410004
return false;
1000510005
}
1000610006

10007+
if (LVL.hasStructVectorCall()) {
10008+
constexpr StringLiteral FailureMessage(
10009+
"Auto-vectorization of calls that return struct types is not yet "
10010+
"supported");
10011+
reportVectorizationFailure(FailureMessage, FailureMessage,
10012+
"StructCallVectorizationUnsupported", ORE, L);
10013+
return false;
10014+
}
10015+
1000710016
// Entrance to the VPlan-native vectorization path. Outer loops are processed
1000810017
// here. They may require CFG and instruction level transformations before
1000910018
// even evaluating whether vectorization is profitable. Since we cannot modify
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
; RUN: opt < %s -mattr=+sve -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S | FileCheck %s
2+
; RUN: opt < %s -mattr=+sve -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -pass-remarks-analysis=loop-vectorize -disable-output -S 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
; Tests basic vectorization of scalable homogeneous struct literal returns.
7+
8+
; TODO: Support vectorization in this case.
9+
; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
10+
define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
11+
; CHECK-LABEL: define void @struct_return_f32_widen
12+
; CHECK-NOT: vector.body:
13+
entry:
14+
br label %for.body
15+
16+
for.body:
17+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
18+
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
19+
%in_val = load float, ptr %arrayidx, align 4
20+
%call = tail call { float, float } @foo(float %in_val) #0
21+
%extract_a = extractvalue { float, float } %call, 0
22+
%extract_b = extractvalue { float, float } %call, 1
23+
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
24+
store float %extract_a, ptr %arrayidx2, align 4
25+
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
26+
store float %extract_b, ptr %arrayidx4, align 4
27+
%iv.next = add nuw nsw i64 %iv, 1
28+
%exitcond.not = icmp eq i64 %iv.next, 1024
29+
br i1 %exitcond.not, label %exit, label %for.body
30+
31+
exit:
32+
ret void
33+
}
34+
35+
; TODO: Support vectorization in this case.
36+
; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
37+
define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
38+
; CHECK-LABEL: define void @struct_return_f64_widen
39+
; CHECK-NOT: vector.body:
40+
entry:
41+
br label %for.body
42+
43+
for.body:
44+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
45+
%arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
46+
%in_val = load double, ptr %arrayidx, align 8
47+
%call = tail call { double, double } @bar(double %in_val) #1
48+
%extract_a = extractvalue { double, double } %call, 0
49+
%extract_b = extractvalue { double, double } %call, 1
50+
%arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv
51+
store double %extract_a, ptr %arrayidx2, align 8
52+
%arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv
53+
store double %extract_b, ptr %arrayidx4, align 8
54+
%iv.next = add nuw nsw i64 %iv, 1
55+
%exitcond.not = icmp eq i64 %iv.next, 1024
56+
br i1 %exitcond.not, label %exit, label %for.body
57+
58+
exit:
59+
ret void
60+
}
61+
62+
; TODO: Support vectorization in this case.
63+
; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
64+
define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, ptr writeonly %out_b) {
65+
; CHECK-LABEL: define void @struct_return_f32_widen_rt_checks
66+
; CHECK-NOT: vector.body:
67+
entry:
68+
br label %for.body
69+
70+
for.body:
71+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
72+
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
73+
%in_val = load float, ptr %arrayidx, align 4
74+
%call = tail call { float, float } @foo(float %in_val) #0
75+
%extract_a = extractvalue { float, float } %call, 0
76+
%extract_b = extractvalue { float, float } %call, 1
77+
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
78+
store float %extract_a, ptr %arrayidx2, align 4
79+
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
80+
store float %extract_b, ptr %arrayidx4, align 4
81+
%iv.next = add nuw nsw i64 %iv, 1
82+
%exitcond.not = icmp eq i64 %iv.next, 1024
83+
br i1 %exitcond.not, label %exit, label %for.body
84+
85+
exit:
86+
ret void
87+
}
88+
89+
declare { float, float } @foo(float)
90+
declare { double, double } @bar(double)
91+
92+
declare { <vscale x 4 x float>, <vscale x 4 x float> } @scalable_vec_masked_foo(<vscale x 4 x float>, <vscale x 4 x i1>)
93+
declare { <vscale x 2 x double>, <vscale x 2 x double> } @scalable_vec_masked_bar(<vscale x 2 x double>, <vscale x 2 x i1>)
94+
95+
96+
attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" }
97+
attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_bar(scalable_vec_masked_bar)" }

0 commit comments

Comments
 (0)