Skip to content

Commit f88ef1b

Browse files
authored
[LV] Teach LoopVectorizationLegality about struct vector calls (#119221)
This is a split-off from #109833 and only adds code relating to checking if a struct-returning call can be vectorized. This initial patch only allows the case where all users of the struct return are `extractvalue` operations that can be widened. ``` %call = tail call { float, float } @foo(float %in_val) %extract_a = extractvalue { float, float } %call, 0 %extract_b = extractvalue { float, float } %call, 1 ``` Note: The tests require the VFABI changes from #119000 to pass.
1 parent f07b10b commit f88ef1b

File tree

7 files changed

+576
-2
lines changed

7 files changed

+576
-2
lines changed

llvm/include/llvm/IR/VectorTypeUtils.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ Type *toScalarizedStructTy(StructType *StructTy);
4040
/// are vectors of matching element count. This does not include empty structs.
4141
bool isVectorizedStructTy(StructType *StructTy);
4242

43+
/// Returns true if `StructTy` is an unpacked literal struct where all elements
44+
/// are scalars that can be used as vector element types.
45+
bool canVectorizeStructTy(StructType *StructTy);
46+
4347
/// A helper for converting to vectorized types. For scalar types, this is
4448
/// equivalent to calling `toVectorTy`. For struct types, this returns a new
4549
/// struct where each element type has been widened to a vector type.
@@ -71,6 +75,18 @@ inline bool isVectorizedTy(Type *Ty) {
7175
return Ty->isVectorTy();
7276
}
7377

78+
/// Returns true if `Ty` is a valid vector element type, void, or an unpacked
79+
/// literal struct where all elements are valid vector element types.
80+
/// Note: Even if a type can be vectorized that does not mean it is valid to do
81+
/// so in all cases. For example, a vectorized struct (as returned by
82+
/// toVectorizedTy) does not perform (de)interleaving, so it can't be used for
83+
/// vectorizing loads/stores.
84+
inline bool canVectorizeTy(Type *Ty) {
85+
if (StructType *StructTy = dyn_cast<StructType>(Ty))
86+
return canVectorizeStructTy(StructTy);
87+
return Ty->isVoidTy() || VectorType::isValidElementType(Ty);
88+
}
89+
7490
/// Returns the types contained in `Ty`. For struct types, it returns the
7591
/// elements, all other types are returned directly.
7692
inline ArrayRef<Type *> getContainedTypes(Type *const &Ty) {

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,10 @@ class LoopVectorizationLegality {
422422
/// has a vectorized variant available.
423423
bool hasVectorCallVariants() const { return VecCallVariantsFound; }
424424

425+
/// Returns true if there is at least one function call in the loop which
426+
/// returns a struct type and needs to be vectorized.
427+
bool hasStructVectorCall() const { return StructVecCallFound; }
428+
425429
unsigned getNumStores() const { return LAI->getNumStores(); }
426430
unsigned getNumLoads() const { return LAI->getNumLoads(); }
427431

@@ -644,6 +648,12 @@ class LoopVectorizationLegality {
644648
/// the use of those function variants.
645649
bool VecCallVariantsFound = false;
646650

651+
/// If we find a call (to be vectorized) that returns a struct type, record
652+
/// that so we can bail out until this is supported.
653+
/// TODO: Remove this flag once vectorizing calls with struct returns is
654+
/// supported.
655+
bool StructVecCallFound = false;
656+
647657
/// Indicates whether this loop has an uncountable early exit, i.e. an
648658
/// uncountable exiting block that is not the latch.
649659
bool HasUncountableEarlyExit = false;

llvm/lib/IR/VectorTypeUtils.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,11 @@ bool llvm::isVectorizedStructTy(StructType *StructTy) {
5252
return Ty->isVectorTy() && cast<VectorType>(Ty)->getElementCount() == VF;
5353
});
5454
}
55+
56+
/// Returns true if `StructTy` is an unpacked literal struct where all elements
57+
/// are scalars that can be used as vector element types.
58+
bool llvm::canVectorizeStructTy(StructType *StructTy) {
59+
auto ElemTys = StructTy->elements();
60+
return !ElemTys.empty() && isUnpackedStructLiteral(StructTy) &&
61+
all_of(ElemTys, VectorType::isValidElementType);
62+
}

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,18 @@ static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
778778
return Scalarize;
779779
}
780780

781+
/// Returns true if the call return type `Ty` can be widened by the loop
782+
/// vectorizer.
783+
static bool canWidenCallReturnType(Type *Ty) {
784+
auto *StructTy = dyn_cast<StructType>(Ty);
785+
// TODO: Remove the homogeneous types restriction. This is just an initial
786+
// simplification. When we want to support things like the overflow intrinsics
787+
// we will have to lift this restriction.
788+
if (StructTy && !StructTy->containsHomogeneousTypes())
789+
return false;
790+
return canVectorizeTy(StructTy);
791+
}
792+
781793
bool LoopVectorizationLegality::canVectorizeInstrs() {
782794
BasicBlock *Header = TheLoop->getHeader();
783795

@@ -942,11 +954,29 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
942954
if (CI && !VFDatabase::getMappings(*CI).empty())
943955
VecCallVariantsFound = true;
944956

957+
auto CanWidenInstructionTy = [this](Instruction const &Inst) {
958+
Type *InstTy = Inst.getType();
959+
if (!isa<StructType>(InstTy))
960+
return canVectorizeTy(InstTy);
961+
962+
// For now, we only recognize struct values returned from calls where
963+
// all users are extractvalue as vectorizable. All element types of the
964+
// struct must be types that can be widened.
965+
if (isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
966+
all_of(Inst.users(), IsaPred<ExtractValueInst>)) {
967+
// TODO: Remove the `StructVecCallFound` flag once vectorizing calls
968+
// with struct returns is supported.
969+
StructVecCallFound = true;
970+
return true;
971+
}
972+
973+
return false;
974+
};
975+
945976
// Check that the instruction return type is vectorizable.
946977
// We can't vectorize casts from vector type to scalar type.
947978
// Also, we can't vectorize extractelement instructions.
948-
if ((!VectorType::isValidElementType(I.getType()) &&
949-
!I.getType()->isVoidTy()) ||
979+
if (!CanWidenInstructionTy(I) ||
950980
(isa<CastInst>(I) &&
951981
!VectorType::isValidElementType(I.getOperand(0)->getType())) ||
952982
isa<ExtractElementInst>(I)) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10353,6 +10353,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1035310353
return false;
1035410354
}
1035510355

10356+
if (LVL.hasStructVectorCall()) {
10357+
reportVectorizationFailure("Auto-vectorization of calls that return struct "
10358+
"types is not yet supported",
10359+
"StructCallVectorizationUnsupported", ORE, L);
10360+
return false;
10361+
}
10362+
1035610363
// Entrance to the VPlan-native vectorization path. Outer loops are processed
1035710364
// here. They may require CFG and instruction level transformations before
1035810365
// even evaluating whether vectorization is profitable. Since we cannot modify
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
; RUN: opt < %s -mattr=+sve -passes=loop-vectorize -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S -pass-remarks-analysis=loop-vectorize 2>%t | FileCheck %s
2+
; RUN: cat %t | FileCheck --check-prefix=CHECK-REMARKS %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
; Tests basic vectorization of scalable homogeneous struct literal returns.
7+
8+
; TODO: Support vectorization in this case.
9+
; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
10+
define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
11+
; CHECK-LABEL: define void @struct_return_f32_widen
12+
; CHECK-NOT: vector.body:
13+
entry:
14+
br label %for.body
15+
16+
for.body:
17+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
18+
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
19+
%in_val = load float, ptr %arrayidx, align 4
20+
%call = tail call { float, float } @foo(float %in_val) #0
21+
%extract_a = extractvalue { float, float } %call, 0
22+
%extract_b = extractvalue { float, float } %call, 1
23+
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
24+
store float %extract_a, ptr %arrayidx2, align 4
25+
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
26+
store float %extract_b, ptr %arrayidx4, align 4
27+
%iv.next = add nuw nsw i64 %iv, 1
28+
%exitcond.not = icmp eq i64 %iv.next, 1024
29+
br i1 %exitcond.not, label %exit, label %for.body
30+
31+
exit:
32+
ret void
33+
}
34+
35+
; TODO: Support vectorization in this case.
36+
; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
37+
define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
38+
; CHECK-LABEL: define void @struct_return_f64_widen
39+
; CHECK-NOT: vector.body:
40+
entry:
41+
br label %for.body
42+
43+
for.body:
44+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
45+
%arrayidx = getelementptr inbounds double, ptr %in, i64 %iv
46+
%in_val = load double, ptr %arrayidx, align 8
47+
%call = tail call { double, double } @bar(double %in_val) #1
48+
%extract_a = extractvalue { double, double } %call, 0
49+
%extract_b = extractvalue { double, double } %call, 1
50+
%arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv
51+
store double %extract_a, ptr %arrayidx2, align 8
52+
%arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv
53+
store double %extract_b, ptr %arrayidx4, align 8
54+
%iv.next = add nuw nsw i64 %iv, 1
55+
%exitcond.not = icmp eq i64 %iv.next, 1024
56+
br i1 %exitcond.not, label %exit, label %for.body
57+
58+
exit:
59+
ret void
60+
}
61+
62+
; TODO: Support vectorization in this case.
63+
; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported
64+
define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, ptr writeonly %out_b) {
65+
; CHECK-LABEL: define void @struct_return_f32_widen_rt_checks
66+
; CHECK-NOT: vector.body:
67+
entry:
68+
br label %for.body
69+
70+
for.body:
71+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
72+
%arrayidx = getelementptr inbounds float, ptr %in, i64 %iv
73+
%in_val = load float, ptr %arrayidx, align 4
74+
%call = tail call { float, float } @foo(float %in_val) #0
75+
%extract_a = extractvalue { float, float } %call, 0
76+
%extract_b = extractvalue { float, float } %call, 1
77+
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv
78+
store float %extract_a, ptr %arrayidx2, align 4
79+
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv
80+
store float %extract_b, ptr %arrayidx4, align 4
81+
%iv.next = add nuw nsw i64 %iv, 1
82+
%exitcond.not = icmp eq i64 %iv.next, 1024
83+
br i1 %exitcond.not, label %exit, label %for.body
84+
85+
exit:
86+
ret void
87+
}
88+
89+
declare { float, float } @foo(float)
90+
declare { double, double } @bar(double)
91+
92+
declare { <vscale x 4 x float>, <vscale x 4 x float> } @scalable_vec_masked_foo(<vscale x 4 x float>, <vscale x 4 x i1>)
93+
declare { <vscale x 2 x double>, <vscale x 2 x double> } @scalable_vec_masked_bar(<vscale x 2 x double>, <vscale x 2 x i1>)
94+
95+
96+
attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" }
97+
attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_bar(scalable_vec_masked_bar)" }

0 commit comments

Comments
 (0)