Skip to content

Commit 671bc7c

Browse files
committed
Fixup
1 parent 89b12b3 commit 671bc7c

File tree

3 files changed

+73
-41
lines changed

3 files changed

+73
-41
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -5456,8 +5456,8 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
54565456
for (Type *VectorTy : getContainedTypes(WideTy)) {
54575457
ScalarCost += TTI.getScalarizationOverhead(
54585458
cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getFixedValue()),
5459-
/*Insert*/ true,
5460-
/*Extract*/ false, CostKind);
5459+
/*Insert=*/true,
5460+
/*Extract=*/false, CostKind);
54615461
}
54625462
ScalarCost +=
54635463
VF.getFixedValue() * TTI.getCFInstrCost(Instruction::PHI, CostKind);
@@ -5954,8 +5954,8 @@ InstructionCost LoopVectorizationCostModel::getScalarizationOverhead(
59545954
for (Type *VectorTy : getContainedTypes(RetTy)) {
59555955
Cost += TTI.getScalarizationOverhead(
59565956
cast<VectorType>(VectorTy), APInt::getAllOnes(VF.getKnownMinValue()),
5957-
/*Insert*/ true,
5958-
/*Extract*/ false, CostKind);
5957+
/*Insert=*/true,
5958+
/*Extract=*/false, CostKind);
59595959
}
59605960
}
59615961

llvm/lib/Transforms/Vectorize/VPlan.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,6 @@ void VPTransformState::packScalarIntoWideValue(VPValue *Def,
414414
WideValue = Builder.CreateInsertValue(WideValue, VectorValue, I);
415415
}
416416
} else {
417-
assert(WideValue->getType()->isVectorTy() && "expected vector type!");
418417
WideValue = Builder.CreateInsertElement(WideValue, ScalarInst, LaneExpr);
419418
}
420419
set(Def, WideValue);

llvm/test/Transforms/LoopVectorize/AArch64/struct-return.ll

+69-36
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@ entry:
2929
for.body:
3030
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
3131
%arrayidx = getelementptr inbounds float, ptr %in, i64 %indvars.iv
32-
%0 = load float, ptr %arrayidx, align 4
33-
%call = tail call { float, float } @foo(float %0) #0
34-
%1 = extractvalue { float, float } %call, 0
35-
%2 = extractvalue { float, float } %call, 1
32+
%in_val = load float, ptr %arrayidx, align 4
33+
%call = tail call { float, float } @foo(float %in_val) #0
34+
%extract_a = extractvalue { float, float } %call, 0
35+
%extract_b = extractvalue { float, float } %call, 1
3636
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %indvars.iv
37-
store float %1, ptr %arrayidx2, align 4
37+
store float %extract_a, ptr %arrayidx2, align 4
3838
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %indvars.iv
39-
store float %2, ptr %arrayidx4, align 4
39+
store float %extract_b, ptr %arrayidx4, align 4
4040
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
4141
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
4242
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -69,14 +69,14 @@ entry:
6969
for.body:
7070
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
7171
%arrayidx = getelementptr inbounds double, ptr %in, i64 %indvars.iv
72-
%0 = load double, ptr %arrayidx, align 8
73-
%call = tail call { double, double } @bar(double %0) #1
74-
%1 = extractvalue { double, double } %call, 0
75-
%2 = extractvalue { double, double } %call, 1
72+
%in_val = load double, ptr %arrayidx, align 8
73+
%call = tail call { double, double } @bar(double %in_val) #1
74+
%extract_a = extractvalue { double, double } %call, 0
75+
%extract_b = extractvalue { double, double } %call, 1
7676
%arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %indvars.iv
77-
store double %1, ptr %arrayidx2, align 8
77+
store double %extract_a, ptr %arrayidx2, align 8
7878
%arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %indvars.iv
79-
store double %2, ptr %arrayidx4, align 8
79+
store double %extract_b, ptr %arrayidx4, align 8
8080
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
8181
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
8282
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -107,15 +107,15 @@ entry:
107107
for.body:
108108
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
109109
%arrayidx = getelementptr inbounds float, ptr %in, i64 %indvars.iv
110-
%0 = load float, ptr %arrayidx, align 4
110+
%in_val = load float, ptr %arrayidx, align 4
111111
; #3 does not have a fixed-size vector mapping (so replication is used)
112-
%call = tail call { float, float } @foo(float %0) #3
113-
%1 = extractvalue { float, float } %call, 0
114-
%2 = extractvalue { float, float } %call, 1
112+
%call = tail call { float, float } @foo(float %in_val) #3
113+
%extract_a = extractvalue { float, float } %call, 0
114+
%extract_b = extractvalue { float, float } %call, 1
115115
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %indvars.iv
116-
store float %1, ptr %arrayidx2, align 4
116+
store float %extract_a, ptr %arrayidx2, align 4
117117
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %indvars.iv
118-
store float %2, ptr %arrayidx4, align 4
118+
store float %extract_b, ptr %arrayidx4, align 4
119119
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
120120
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
121121
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -150,14 +150,14 @@ entry:
150150
for.body:
151151
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
152152
%arrayidx = getelementptr inbounds float, ptr %in, i64 %indvars.iv
153-
%0 = load float, ptr %arrayidx, align 4
154-
%call = tail call { float, float } @foo(float %0) #0
155-
%1 = extractvalue { float, float } %call, 0
156-
%2 = extractvalue { float, float } %call, 1
153+
%in_val = load float, ptr %arrayidx, align 4
154+
%call = tail call { float, float } @foo(float %in_val) #0
155+
%extract_a = extractvalue { float, float } %call, 0
156+
%extract_b = extractvalue { float, float } %call, 1
157157
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %indvars.iv
158-
store float %1, ptr %arrayidx2, align 4
158+
store float %extract_a, ptr %arrayidx2, align 4
159159
%arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %indvars.iv
160-
store float %2, ptr %arrayidx4, align 4
160+
store float %extract_b, ptr %arrayidx4, align 4
161161
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
162162
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
163163
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -181,14 +181,14 @@ entry:
181181
for.body:
182182
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
183183
%arrayidx = getelementptr inbounds float, ptr %in, i64 %indvars.iv
184-
%0 = load float, ptr %arrayidx, align 4
185-
%call = tail call { float, i32 } @baz(float %0) #2
186-
%1 = extractvalue { float, i32 } %call, 0
187-
%2 = extractvalue { float, i32 } %call, 1
184+
%in_val = load float, ptr %arrayidx, align 4
185+
%call = tail call { float, i32 } @baz(float %in_val) #2
186+
%extract_a = extractvalue { float, i32 } %call, 0
187+
%extract_b = extractvalue { float, i32 } %call, 1
188188
%arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %indvars.iv
189-
store float %1, ptr %arrayidx2, align 4
189+
store float %extract_a, ptr %arrayidx2, align 4
190190
%arrayidx4 = getelementptr inbounds i32, ptr %out_b, i64 %indvars.iv
191-
store i32 %2, ptr %arrayidx4, align 4
191+
store i32 %extract_b, ptr %arrayidx4, align 4
192192
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
193193
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
194194
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
@@ -214,14 +214,47 @@ entry:
214214
for.body:
215215
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
216216
%arrayidx = getelementptr inbounds double, ptr %in, i64 %indvars.iv
217-
%0 = load double, ptr %arrayidx, align 8
218-
%call = tail call %named_struct @bar_named(double %0) #4
219-
%1 = extractvalue %named_struct %call, 0
220-
%2 = extractvalue %named_struct %call, 1
217+
%in_val = load double, ptr %arrayidx, align 8
218+
%call = tail call %named_struct @bar_named(double %in_val) #4
219+
%extract_a = extractvalue %named_struct %call, 0
220+
%extract_b = extractvalue %named_struct %call, 1
221221
%arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %indvars.iv
222-
store double %1, ptr %arrayidx2, align 8
222+
store double %extract_a, ptr %arrayidx2, align 8
223223
%arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %indvars.iv
224-
store double %2, ptr %arrayidx4, align 8
224+
store double %extract_b, ptr %arrayidx4, align 8
225+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
226+
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
227+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
228+
229+
for.cond.cleanup:
230+
ret void
231+
}
232+
233+
; TODO: Allow mixed-struct type vectorization and mark overflow intrinsics as trivially vectorizable.
234+
define void @test_overflow_intrinsic(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) {
235+
; NEON-LABEL: define void @test_overflow_intrinsic
236+
; NEON-NOT: vector.body:
237+
; SVE_TF-NOT: @llvm.sadd.with.overflow.v{{.+}}i32
238+
;
239+
; SVE_TF-LABEL: define void @test_overflow_intrinsic
240+
; SVE_TF-NOT: vector.body:
241+
; SVE_TF-NOT: @llvm.sadd.with.overflow.v{{.+}}i32
242+
; SVE_TF-NOT: @llvm.sadd.with.overflow.nxv{{.+}}i32
243+
entry:
244+
br label %for.body
245+
246+
for.body:
247+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
248+
%arrayidx = getelementptr inbounds float, ptr %in, i64 %indvars.iv
249+
%in_val = load i32, ptr %arrayidx, align 4
250+
%call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %in_val, i32 %in_val)
251+
%extract_ret = extractvalue { i32, i1 } %call, 0
252+
%extract_overflow = extractvalue { i32, i1 } %call, 1
253+
%zext_overflow = zext i1 %extract_overflow to i8
254+
%arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %indvars.iv
255+
store i32 %extract_ret, ptr %arrayidx2, align 4
256+
%arrayidx4 = getelementptr inbounds i8, ptr %out_b, i64 %indvars.iv
257+
store i8 %zext_overflow, ptr %arrayidx4, align 4
225258
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
226259
%exitcond.not = icmp eq i64 %indvars.iv.next, 1024
227260
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body

0 commit comments

Comments
 (0)