Skip to content

Commit 24335e4

Browse files
committed
[RLEV] Add coverage for expansion of umin EVL idiom
1 parent 0ea6b8e commit 24335e4

File tree

1 file changed

+104
-1
lines changed

1 file changed

+104
-1
lines changed

llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll

Lines changed: 104 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -passes='loop(indvars),instcombine' -S < %s | FileCheck %s
2+
; RUN: opt -passes='loop(indvars),instcombine' -replexitval=always -S < %s | FileCheck %s
33

44
;; Test that loop's exit value is rewritten to its initial
55
;; value from loop preheader
@@ -197,3 +197,106 @@ crit_edge:
197197
ret i16 %conv
198198
}
199199

200+
define i32 @vscale_slt_with_vp_umin(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
201+
; CHECK-LABEL: @vscale_slt_with_vp_umin(
202+
; CHECK-NEXT: entry:
203+
; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
204+
; CHECK-NEXT: [[VF:%.*]] = shl nuw nsw i32 [[VSCALE]], 2
205+
; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
206+
; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[EARLY_EXIT:%.*]]
207+
; CHECK: for.body.preheader:
208+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
209+
; CHECK: early.exit:
210+
; CHECK-NEXT: ret i32 0
211+
; CHECK: for.body:
212+
; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
213+
; CHECK-NEXT: [[LEFT:%.*]] = sub nsw i32 [[N]], [[I_05]]
214+
; CHECK-NEXT: [[VF_CAPPED:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[LEFT]])
215+
; CHECK-NEXT: store i32 [[VF_CAPPED]], ptr [[A:%.*]], align 4
216+
; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_05]], [[VF]]
217+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD]], [[N]]
218+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
219+
; CHECK: for.end:
220+
; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[N]], -1
221+
; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], [[VF]]
222+
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[VSCALE]]
223+
; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 2
224+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[N]], [[TMP3]]
225+
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[TMP4]])
226+
; CHECK-NEXT: ret i32 [[UMIN]]
227+
;
228+
entry:
229+
%vscale = call i32 @llvm.vscale.i32()
230+
%VF = shl nuw nsw i32 %vscale, 2
231+
%cmp4 = icmp sgt i32 %n, 0
232+
br i1 %cmp4, label %for.body, label %early.exit
233+
234+
early.exit:
235+
ret i32 0
236+
237+
for.body:
238+
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
239+
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
240+
%left = sub i32 %n, %i.05
241+
%VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left)
242+
store i32 %VF.capped, ptr %A
243+
244+
%add = add nsw i32 %i.05, %VF
245+
%cmp = icmp slt i32 %add, %n
246+
br i1 %cmp, label %for.body, label %for.end
247+
248+
for.end:
249+
ret i32 %VF.capped
250+
}
251+
252+
define i32 @vscale_slt_with_vp_umin2(ptr nocapture %A, i32 %n) mustprogress vscale_range(2,1024) {
253+
; CHECK-LABEL: @vscale_slt_with_vp_umin2(
254+
; CHECK-NEXT: entry:
255+
; CHECK-NEXT: [[VSCALE:%.*]] = call i32 @llvm.vscale.i32()
256+
; CHECK-NEXT: [[VF:%.*]] = shl nuw nsw i32 [[VSCALE]], 2
257+
; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[VF]], [[N:%.*]]
258+
; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[EARLY_EXIT:%.*]]
259+
; CHECK: for.body.preheader:
260+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
261+
; CHECK: early.exit:
262+
; CHECK-NEXT: ret i32 0
263+
; CHECK: for.body:
264+
; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
265+
; CHECK-NEXT: [[LEFT:%.*]] = sub i32 [[N]], [[I_05]]
266+
; CHECK-NEXT: [[VF_CAPPED:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[LEFT]])
267+
; CHECK-NEXT: store i32 [[VF_CAPPED]], ptr [[A:%.*]], align 4
268+
; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I_05]], [[VF]]
269+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[ADD]], [[N]]
270+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
271+
; CHECK: for.end:
272+
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1
273+
; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[TMP0]], [[VF]]
274+
; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], [[VSCALE]]
275+
; CHECK-NEXT: [[TMP3:%.*]] = shl i32 [[TMP2]], 2
276+
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[N]], [[TMP3]]
277+
; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[VF]], i32 [[TMP4]])
278+
; CHECK-NEXT: ret i32 [[UMIN]]
279+
;
280+
entry:
281+
%vscale = call i32 @llvm.vscale.i32()
282+
%VF = shl nuw nsw i32 %vscale, 2
283+
%cmp4 = icmp sgt i32 %n, %VF
284+
br i1 %cmp4, label %for.body, label %early.exit
285+
286+
early.exit:
287+
ret i32 0
288+
289+
for.body:
290+
%i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
291+
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
292+
%left = sub i32 %n, %i.05
293+
%VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left)
294+
store i32 %VF.capped, ptr %A
295+
296+
%add = add nsw i32 %i.05, %VF
297+
%cmp = icmp slt i32 %add, %n
298+
br i1 %cmp, label %for.body, label %for.end
299+
300+
for.end:
301+
ret i32 %VF.capped
302+
}

0 commit comments

Comments
 (0)