Skip to content

Commit 6b93bd0

Browse files
committed
Add tests
1 parent 671976f commit 6b93bd0

File tree

2 files changed

+263
-0
lines changed

2 files changed

+263
-0
lines changed
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
define void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
7+
; CHECK-LABEL: vector_loop_with_icmp:
8+
; CHECK: // %bb.0: // %entry
9+
; CHECK-NEXT: mov w8, #15 // =0xf
10+
; CHECK-NEXT: mov w10, #4 // =0x4
11+
; CHECK-NEXT: adrp x9, .LCPI0_0
12+
; CHECK-NEXT: adrp x11, .LCPI0_1
13+
; CHECK-NEXT: dup v0.2d, x8
14+
; CHECK-NEXT: dup v1.2d, x10
15+
; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_0]
16+
; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI0_1]
17+
; CHECK-NEXT: add x9, x0, #8
18+
; CHECK-NEXT: mov w10, #16 // =0x10
19+
; CHECK-NEXT: mov w11, #1 // =0x1
20+
; CHECK-NEXT: b .LBB0_2
21+
; CHECK-NEXT: .LBB0_1: // %pred.store.continue18
22+
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
23+
; CHECK-NEXT: add v2.2d, v2.2d, v1.2d
24+
; CHECK-NEXT: add v3.2d, v3.2d, v1.2d
25+
; CHECK-NEXT: subs x10, x10, #4
26+
; CHECK-NEXT: add x9, x9, #16
27+
; CHECK-NEXT: b.eq .LBB0_10
28+
; CHECK-NEXT: .LBB0_2: // %vector.body
29+
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
30+
; CHECK-NEXT: cmhi v4.2d, v0.2d, v3.2d
31+
; CHECK-NEXT: xtn v4.2s, v4.2d
32+
; CHECK-NEXT: uzp1 v4.4h, v4.4h, v0.4h
33+
; CHECK-NEXT: umov w12, v4.h[0]
34+
; CHECK-NEXT: tbz w12, #0, .LBB0_4
35+
; CHECK-NEXT: // %bb.3: // %pred.store.if
36+
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
37+
; CHECK-NEXT: stur w11, [x9, #-8]
38+
; CHECK-NEXT: .LBB0_4: // %pred.store.continue
39+
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
40+
; CHECK-NEXT: dup v4.2d, x8
41+
; CHECK-NEXT: cmhi v4.2d, v4.2d, v3.2d
42+
; CHECK-NEXT: xtn v4.2s, v4.2d
43+
; CHECK-NEXT: uzp1 v4.4h, v4.4h, v0.4h
44+
; CHECK-NEXT: umov w12, v4.h[1]
45+
; CHECK-NEXT: tbz w12, #0, .LBB0_6
46+
; CHECK-NEXT: // %bb.5: // %pred.store.if5
47+
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
48+
; CHECK-NEXT: stur w11, [x9, #-4]
49+
; CHECK-NEXT: .LBB0_6: // %pred.store.continue6
50+
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
51+
; CHECK-NEXT: dup v4.2d, x8
52+
; CHECK-NEXT: cmhi v4.2d, v4.2d, v2.2d
53+
; CHECK-NEXT: xtn v4.2s, v4.2d
54+
; CHECK-NEXT: uzp1 v4.4h, v0.4h, v4.4h
55+
; CHECK-NEXT: umov w12, v4.h[2]
56+
; CHECK-NEXT: tbz w12, #0, .LBB0_8
57+
; CHECK-NEXT: // %bb.7: // %pred.store.if7
58+
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
59+
; CHECK-NEXT: str w11, [x9]
60+
; CHECK-NEXT: .LBB0_8: // %pred.store.continue8
61+
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
62+
; CHECK-NEXT: dup v4.2d, x8
63+
; CHECK-NEXT: cmhi v4.2d, v4.2d, v2.2d
64+
; CHECK-NEXT: xtn v4.2s, v4.2d
65+
; CHECK-NEXT: uzp1 v4.4h, v0.4h, v4.4h
66+
; CHECK-NEXT: umov w12, v4.h[3]
67+
; CHECK-NEXT: tbz w12, #0, .LBB0_1
68+
; CHECK-NEXT: // %bb.9: // %pred.store.if9
69+
; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1
70+
; CHECK-NEXT: str w11, [x9, #4]
71+
; CHECK-NEXT: b .LBB0_1
72+
; CHECK-NEXT: .LBB0_10: // %for.cond.cleanup
73+
; CHECK-NEXT: ret
74+
entry:
75+
br label %vector.body
76+
77+
vector.body:
78+
%index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue18 ]
79+
%vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %entry ], [ %vec.ind.next, %pred.store.continue18 ]
80+
%0 = icmp ult <4 x i64> %vec.ind, <i64 15, i64 15, i64 15, i64 15>
81+
%1 = extractelement <4 x i1> %0, i64 0
82+
br i1 %1, label %pred.store.if, label %pred.store.continue
83+
84+
pred.store.if:
85+
%2 = getelementptr inbounds i32, ptr %dest, i64 %index
86+
store i32 1, ptr %2, align 4
87+
br label %pred.store.continue
88+
89+
pred.store.continue:
90+
%3 = extractelement <4 x i1> %0, i64 1
91+
br i1 %3, label %pred.store.if5, label %pred.store.continue6
92+
93+
pred.store.if5:
94+
%4 = or disjoint i64 %index, 1
95+
%5 = getelementptr inbounds i32, ptr %dest, i64 %4
96+
store i32 1, ptr %5, align 4
97+
br label %pred.store.continue6
98+
99+
pred.store.continue6:
100+
%6 = extractelement <4 x i1> %0, i64 2
101+
br i1 %6, label %pred.store.if7, label %pred.store.continue8
102+
103+
pred.store.if7:
104+
%7 = or disjoint i64 %index, 2
105+
%8 = getelementptr inbounds i32, ptr %dest, i64 %7
106+
store i32 1, ptr %8, align 4
107+
br label %pred.store.continue8
108+
109+
pred.store.continue8:
110+
%9 = extractelement <4 x i1> %0, i64 3
111+
br i1 %9, label %pred.store.if9, label %pred.store.continue18
112+
113+
pred.store.if9:
114+
%10 = or disjoint i64 %index, 3
115+
%11 = getelementptr inbounds i32, ptr %dest, i64 %10
116+
store i32 1, ptr %11, align 4
117+
br label %pred.store.continue18
118+
119+
pred.store.continue18:
120+
%index.next = add i64 %index, 4
121+
%vec.ind.next = add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4>
122+
%24 = icmp eq i64 %index.next, 16
123+
br i1 %24, label %for.cond.cleanup, label %vector.body
124+
125+
for.cond.cleanup:
126+
ret void
127+
}
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 < %s | FileCheck %s
3+
4+
define arm_aapcs_vfpcc void @vector_loop_with_icmp(ptr nocapture noundef writeonly %dest) {
5+
; CHECK-LABEL: vector_loop_with_icmp:
6+
; CHECK: @ %bb.0: @ %entry
7+
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr}
8+
; CHECK-NEXT: adr r1, .LCPI0_0
9+
; CHECK-NEXT: adr r2, .LCPI0_1
10+
; CHECK-NEXT: vldrw.u32 q0, [r1]
11+
; CHECK-NEXT: vldrw.u32 q1, [r2]
12+
; CHECK-NEXT: movs r1, #0
13+
; CHECK-NEXT: mov.w r12, #1
14+
; CHECK-NEXT: mov.w lr, #0
15+
; CHECK-NEXT: .LBB0_1: @ %vector.body
16+
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
17+
; CHECK-NEXT: vmov r2, r3, d0
18+
; CHECK-NEXT: vmov r4, r5, d3
19+
; CHECK-NEXT: vmov r6, r7, d1
20+
; CHECK-NEXT: subs r2, #15
21+
; CHECK-NEXT: sbcs r2, r3, #0
22+
; CHECK-NEXT: cset r2, lo
23+
; CHECK-NEXT: cmp r2, #0
24+
; CHECK-NEXT: vmov r2, r3, d1
25+
; CHECK-NEXT: it ne
26+
; CHECK-NEXT: strne.w r12, [r0, r1, lsl #2]
27+
; CHECK-NEXT: subs r2, #15
28+
; CHECK-NEXT: sbcs r2, r3, #0
29+
; CHECK-NEXT: cset r2, lo
30+
; CHECK-NEXT: cmp r2, #0
31+
; CHECK-NEXT: itt ne
32+
; CHECK-NEXT: orrne r2, r1, #1
33+
; CHECK-NEXT: strne.w r12, [r0, r2, lsl #2]
34+
; CHECK-NEXT: vmov r2, r3, d2
35+
; CHECK-NEXT: subs r2, #15
36+
; CHECK-NEXT: sbcs r2, r3, #0
37+
; CHECK-NEXT: cset r2, lo
38+
; CHECK-NEXT: cmp r2, #0
39+
; CHECK-NEXT: itt ne
40+
; CHECK-NEXT: orrne r2, r1, #2
41+
; CHECK-NEXT: strne.w r12, [r0, r2, lsl #2]
42+
; CHECK-NEXT: vmov r2, r3, d3
43+
; CHECK-NEXT: subs r2, #15
44+
; CHECK-NEXT: sbcs r2, r3, #0
45+
; CHECK-NEXT: cset r2, lo
46+
; CHECK-NEXT: cmp r2, #0
47+
; CHECK-NEXT: itt ne
48+
; CHECK-NEXT: orrne r2, r1, #3
49+
; CHECK-NEXT: strne.w r12, [r0, r2, lsl #2]
50+
; CHECK-NEXT: vmov r2, r3, d2
51+
; CHECK-NEXT: adds r1, #4
52+
; CHECK-NEXT: adc lr, lr, #0
53+
; CHECK-NEXT: adds.w r9, r2, #4
54+
; CHECK-NEXT: adc r8, r3, #0
55+
; CHECK-NEXT: vmov r3, r2, d0
56+
; CHECK-NEXT: adds r4, #4
57+
; CHECK-NEXT: adc r5, r5, #0
58+
; CHECK-NEXT: adds r6, #4
59+
; CHECK-NEXT: adc r7, r7, #0
60+
; CHECK-NEXT: vmov q1[2], q1[0], r9, r4
61+
; CHECK-NEXT: vmov q1[3], q1[1], r8, r5
62+
; CHECK-NEXT: adds r3, #4
63+
; CHECK-NEXT: vmov q0[2], q0[0], r3, r6
64+
; CHECK-NEXT: adc r2, r2, #0
65+
; CHECK-NEXT: vmov q0[3], q0[1], r2, r7
66+
; CHECK-NEXT: eor r2, r1, #16
67+
; CHECK-NEXT: orrs.w r2, r2, lr
68+
; CHECK-NEXT: bne .LBB0_1
69+
; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup
70+
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc}
71+
; CHECK-NEXT: .p2align 4
72+
; CHECK-NEXT: @ %bb.3:
73+
; CHECK-NEXT: .LCPI0_0:
74+
; CHECK-NEXT: .long 0 @ 0x0
75+
; CHECK-NEXT: .long 0 @ 0x0
76+
; CHECK-NEXT: .long 1 @ 0x1
77+
; CHECK-NEXT: .long 0 @ 0x0
78+
; CHECK-NEXT: .LCPI0_1:
79+
; CHECK-NEXT: .long 2 @ 0x2
80+
; CHECK-NEXT: .long 0 @ 0x0
81+
; CHECK-NEXT: .long 3 @ 0x3
82+
; CHECK-NEXT: .long 0 @ 0x0
83+
entry:
84+
br label %vector.body
85+
86+
vector.body:
87+
%index = phi i64 [ 0, %entry ], [ %index.next, %pred.store.continue18 ]
88+
%vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %entry ], [ %vec.ind.next, %pred.store.continue18 ]
89+
%0 = icmp ult <4 x i64> %vec.ind, <i64 15, i64 15, i64 15, i64 15>
90+
%1 = extractelement <4 x i1> %0, i64 0
91+
br i1 %1, label %pred.store.if, label %pred.store.continue
92+
93+
pred.store.if:
94+
%2 = getelementptr inbounds i32, ptr %dest, i64 %index
95+
store i32 1, ptr %2, align 4
96+
br label %pred.store.continue
97+
98+
pred.store.continue:
99+
%3 = extractelement <4 x i1> %0, i64 1
100+
br i1 %3, label %pred.store.if5, label %pred.store.continue6
101+
102+
pred.store.if5:
103+
%4 = or disjoint i64 %index, 1
104+
%5 = getelementptr inbounds i32, ptr %dest, i64 %4
105+
store i32 1, ptr %5, align 4
106+
br label %pred.store.continue6
107+
108+
pred.store.continue6:
109+
%6 = extractelement <4 x i1> %0, i64 2
110+
br i1 %6, label %pred.store.if7, label %pred.store.continue8
111+
112+
pred.store.if7:
113+
%7 = or disjoint i64 %index, 2
114+
%8 = getelementptr inbounds i32, ptr %dest, i64 %7
115+
store i32 1, ptr %8, align 4
116+
br label %pred.store.continue8
117+
118+
pred.store.continue8:
119+
%9 = extractelement <4 x i1> %0, i64 3
120+
br i1 %9, label %pred.store.if9, label %pred.store.continue18
121+
122+
pred.store.if9:
123+
%10 = or disjoint i64 %index, 3
124+
%11 = getelementptr inbounds i32, ptr %dest, i64 %10
125+
store i32 1, ptr %11, align 4
126+
br label %pred.store.continue18
127+
128+
pred.store.continue18:
129+
%index.next = add i64 %index, 4
130+
%vec.ind.next = add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4>
131+
%24 = icmp eq i64 %index.next, 16
132+
br i1 %24, label %for.cond.cleanup, label %vector.body
133+
134+
for.cond.cleanup:
135+
ret void
136+
}

0 commit comments

Comments
 (0)