Skip to content

Commit 594bfad

Browse files
authored
[LoongArch] Pre-commit for broadcast load (#136070)
1 parent a354564 commit 594bfad

File tree

2 files changed

+368
-0
lines changed

2 files changed

+368
-0
lines changed
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch64 -mattr=+lasx < %s | FileCheck %s
3+
4+
; TODO: Load a element and splat it to a vector could be lowerd to xvldrepl
5+
6+
; A load has more than one user shouldn't be lowered to xvldrepl
7+
define <4 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst) {
8+
; CHECK-LABEL: should_not_be_optimized:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: ld.d $a0, $a0, 0
11+
; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
12+
; CHECK-NEXT: st.d $a0, $a1, 0
13+
; CHECK-NEXT: ret
14+
%tmp = load i64, ptr %ptr
15+
store i64 %tmp, ptr %dst
16+
%tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
17+
%tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> zeroinitializer
18+
ret <4 x i64> %tmp2
19+
}
20+
21+
define <4 x i64> @xvldrepl_d_unaligned_offset(ptr %ptr) {
22+
; CHECK-LABEL: xvldrepl_d_unaligned_offset:
23+
; CHECK: # %bb.0:
24+
; CHECK-NEXT: ld.d $a0, $a0, 4
25+
; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
26+
; CHECK-NEXT: ret
27+
%p = getelementptr i32, ptr %ptr, i32 1
28+
%tmp = load i64, ptr %p
29+
%tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
30+
%tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> zeroinitializer
31+
ret <4 x i64> %tmp2
32+
}
33+
34+
define <32 x i8> @xvldrepl_b(ptr %ptr) {
35+
; CHECK-LABEL: xvldrepl_b:
36+
; CHECK: # %bb.0:
37+
; CHECK-NEXT: ld.b $a0, $a0, 0
38+
; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0
39+
; CHECK-NEXT: ret
40+
%tmp = load i8, ptr %ptr
41+
%tmp1 = insertelement <32 x i8> zeroinitializer, i8 %tmp, i32 0
42+
%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> poison, <32 x i32> zeroinitializer
43+
ret <32 x i8> %tmp2
44+
}
45+
46+
define <32 x i8> @xvldrepl_b_offset(ptr %ptr) {
47+
; CHECK-LABEL: xvldrepl_b_offset:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: ld.b $a0, $a0, 33
50+
; CHECK-NEXT: xvreplgr2vr.b $xr0, $a0
51+
; CHECK-NEXT: ret
52+
%p = getelementptr i8, ptr %ptr, i64 33
53+
%tmp = load i8, ptr %p
54+
%tmp1 = insertelement <32 x i8> zeroinitializer, i8 %tmp, i32 0
55+
%tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> poison, <32 x i32> zeroinitializer
56+
ret <32 x i8> %tmp2
57+
}
58+
59+
60+
define <16 x i16> @xvldrepl_h(ptr %ptr) {
61+
; CHECK-LABEL: xvldrepl_h:
62+
; CHECK: # %bb.0:
63+
; CHECK-NEXT: ld.h $a0, $a0, 0
64+
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
65+
; CHECK-NEXT: ret
66+
%tmp = load i16, ptr %ptr
67+
%tmp1 = insertelement <16 x i16> zeroinitializer, i16 %tmp, i32 0
68+
%tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> poison, <16 x i32> zeroinitializer
69+
ret <16 x i16> %tmp2
70+
}
71+
72+
define <16 x i16> @xvldrepl_h_offset(ptr %ptr) {
73+
; CHECK-LABEL: xvldrepl_h_offset:
74+
; CHECK: # %bb.0:
75+
; CHECK-NEXT: ld.h $a0, $a0, 66
76+
; CHECK-NEXT: xvreplgr2vr.h $xr0, $a0
77+
; CHECK-NEXT: ret
78+
%p = getelementptr i16, ptr %ptr, i64 33
79+
%tmp = load i16, ptr %p
80+
%tmp1 = insertelement <16 x i16> zeroinitializer, i16 %tmp, i32 0
81+
%tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> poison, <16 x i32> zeroinitializer
82+
ret <16 x i16> %tmp2
83+
}
84+
85+
define <8 x i32> @xvldrepl_w(ptr %ptr) {
86+
; CHECK-LABEL: xvldrepl_w:
87+
; CHECK: # %bb.0:
88+
; CHECK-NEXT: ld.w $a0, $a0, 0
89+
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0
90+
; CHECK-NEXT: ret
91+
%tmp = load i32, ptr %ptr
92+
%tmp1 = insertelement <8 x i32> zeroinitializer, i32 %tmp, i32 0
93+
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> poison, <8 x i32> zeroinitializer
94+
ret <8 x i32> %tmp2
95+
}
96+
97+
define <8 x i32> @xvldrepl_w_offset(ptr %ptr) {
98+
; CHECK-LABEL: xvldrepl_w_offset:
99+
; CHECK: # %bb.0:
100+
; CHECK-NEXT: ld.w $a0, $a0, 132
101+
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0
102+
; CHECK-NEXT: ret
103+
%p = getelementptr i32, ptr %ptr, i64 33
104+
%tmp = load i32, ptr %p
105+
%tmp1 = insertelement <8 x i32> zeroinitializer, i32 %tmp, i32 0
106+
%tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> poison, <8 x i32> zeroinitializer
107+
ret <8 x i32> %tmp2
108+
}
109+
110+
111+
define <4 x i64> @xvldrepl_d(ptr %ptr) {
112+
; CHECK-LABEL: xvldrepl_d:
113+
; CHECK: # %bb.0:
114+
; CHECK-NEXT: ld.d $a0, $a0, 0
115+
; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
116+
; CHECK-NEXT: ret
117+
%tmp = load i64, ptr %ptr
118+
%tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
119+
%tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> zeroinitializer
120+
ret <4 x i64> %tmp2
121+
}
122+
123+
define <4 x i64> @xvldrepl_d_offset(ptr %ptr) {
124+
; CHECK-LABEL: xvldrepl_d_offset:
125+
; CHECK: # %bb.0:
126+
; CHECK-NEXT: ld.d $a0, $a0, 264
127+
; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
128+
; CHECK-NEXT: ret
129+
%p = getelementptr i64, ptr %ptr, i64 33
130+
%tmp = load i64, ptr %p
131+
%tmp1 = insertelement <4 x i64> zeroinitializer, i64 %tmp, i32 0
132+
%tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> zeroinitializer
133+
ret <4 x i64> %tmp2
134+
}
135+
136+
define <8 x float> @vldrepl_w_flt(ptr %ptr) {
137+
; CHECK-LABEL: vldrepl_w_flt:
138+
; CHECK: # %bb.0:
139+
; CHECK-NEXT: fld.s $fa0, $a0, 0
140+
; CHECK-NEXT: xvreplve0.w $xr0, $xr0
141+
; CHECK-NEXT: ret
142+
%tmp = load float, ptr %ptr
143+
%tmp1 = insertelement <8 x float> zeroinitializer, float %tmp, i32 0
144+
%tmp2 = shufflevector <8 x float> %tmp1, <8 x float> poison, <8 x i32> zeroinitializer
145+
ret <8 x float> %tmp2
146+
}
147+
148+
define <8 x float> @vldrepl_w_flt_offset(ptr %ptr) {
149+
; CHECK-LABEL: vldrepl_w_flt_offset:
150+
; CHECK: # %bb.0:
151+
; CHECK-NEXT: fld.s $fa0, $a0, 264
152+
; CHECK-NEXT: xvreplve0.w $xr0, $xr0
153+
; CHECK-NEXT: ret
154+
%p = getelementptr i64, ptr %ptr, i64 33
155+
%tmp = load float, ptr %p
156+
%tmp1 = insertelement <8 x float> zeroinitializer, float %tmp, i32 0
157+
%tmp2 = shufflevector <8 x float> %tmp1, <8 x float> poison, <8 x i32> zeroinitializer
158+
ret <8 x float> %tmp2
159+
}
160+
161+
define <4 x double> @vldrepl_d_dbl(ptr %ptr) {
162+
; CHECK-LABEL: vldrepl_d_dbl:
163+
; CHECK: # %bb.0:
164+
; CHECK-NEXT: fld.d $fa0, $a0, 0
165+
; CHECK-NEXT: xvreplve0.d $xr0, $xr0
166+
; CHECK-NEXT: ret
167+
%tmp = load double, ptr %ptr
168+
%tmp1 = insertelement <4 x double> zeroinitializer, double %tmp, i32 0
169+
%tmp2 = shufflevector <4 x double> %tmp1, <4 x double> poison, <4 x i32> zeroinitializer
170+
ret <4 x double> %tmp2
171+
}
172+
173+
define <4 x double> @vldrepl_d_dbl_offset(ptr %ptr) {
174+
; CHECK-LABEL: vldrepl_d_dbl_offset:
175+
; CHECK: # %bb.0:
176+
; CHECK-NEXT: fld.d $fa0, $a0, 264
177+
; CHECK-NEXT: xvreplve0.d $xr0, $xr0
178+
; CHECK-NEXT: ret
179+
%p = getelementptr i64, ptr %ptr, i64 33
180+
%tmp = load double, ptr %p
181+
%tmp1 = insertelement <4 x double> zeroinitializer, double %tmp, i32 0
182+
%tmp2 = shufflevector <4 x double> %tmp1, <4 x double> poison, <4 x i32> zeroinitializer
183+
ret <4 x double> %tmp2
184+
}
185+
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc --mtriple=loongarch64 -mattr=+lsx < %s | FileCheck %s
3+
4+
; TODO: Load a element and splat it to a vector could be lowerd to vldrepl
5+
6+
; A load has more than one user shouldn't be lowered to vldrepl
7+
define <2 x i64> @should_not_be_optimized(ptr %ptr, ptr %dst){
8+
; CHECK-LABEL: should_not_be_optimized:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: ld.d $a0, $a0, 0
11+
; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
12+
; CHECK-NEXT: st.d $a0, $a1, 0
13+
; CHECK-NEXT: ret
14+
%tmp = load i64, ptr %ptr
15+
store i64 %tmp, ptr %dst
16+
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
17+
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
18+
ret <2 x i64> %tmp2
19+
}
20+
21+
define <2 x i64> @vldrepl_d_unaligned_offset(ptr %ptr) {
22+
; CHECK-LABEL: vldrepl_d_unaligned_offset:
23+
; CHECK: # %bb.0:
24+
; CHECK-NEXT: ld.d $a0, $a0, 4
25+
; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
26+
; CHECK-NEXT: ret
27+
%p = getelementptr i32, ptr %ptr, i32 1
28+
%tmp = load i64, ptr %p
29+
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
30+
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
31+
ret <2 x i64> %tmp2
32+
}
33+
34+
define <16 x i8> @vldrepl_b(ptr %ptr) {
35+
; CHECK-LABEL: vldrepl_b:
36+
; CHECK: # %bb.0:
37+
; CHECK-NEXT: ld.b $a0, $a0, 0
38+
; CHECK-NEXT: vreplgr2vr.b $vr0, $a0
39+
; CHECK-NEXT: ret
40+
%tmp = load i8, ptr %ptr
41+
%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %tmp, i32 0
42+
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> poison, <16 x i32> zeroinitializer
43+
ret <16 x i8> %tmp2
44+
}
45+
46+
define <16 x i8> @vldrepl_b_offset(ptr %ptr) {
47+
; CHECK-LABEL: vldrepl_b_offset:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: ld.b $a0, $a0, 33
50+
; CHECK-NEXT: vreplgr2vr.b $vr0, $a0
51+
; CHECK-NEXT: ret
52+
%p = getelementptr i8, ptr %ptr, i64 33
53+
%tmp = load i8, ptr %p
54+
%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %tmp, i32 0
55+
%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> poison, <16 x i32> zeroinitializer
56+
ret <16 x i8> %tmp2
57+
}
58+
59+
60+
define <8 x i16> @vldrepl_h(ptr %ptr) {
61+
; CHECK-LABEL: vldrepl_h:
62+
; CHECK: # %bb.0:
63+
; CHECK-NEXT: ld.h $a0, $a0, 0
64+
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
65+
; CHECK-NEXT: ret
66+
%tmp = load i16, ptr %ptr
67+
%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %tmp, i32 0
68+
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> poison, <8 x i32> zeroinitializer
69+
ret <8 x i16> %tmp2
70+
}
71+
72+
define <8 x i16> @vldrepl_h_offset(ptr %ptr) {
73+
; CHECK-LABEL: vldrepl_h_offset:
74+
; CHECK: # %bb.0:
75+
; CHECK-NEXT: ld.h $a0, $a0, 66
76+
; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
77+
; CHECK-NEXT: ret
78+
%p = getelementptr i16, ptr %ptr, i64 33
79+
%tmp = load i16, ptr %p
80+
%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %tmp, i32 0
81+
%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> poison, <8 x i32> zeroinitializer
82+
ret <8 x i16> %tmp2
83+
}
84+
85+
define <4 x i32> @vldrepl_w(ptr %ptr) {
86+
; CHECK-LABEL: vldrepl_w:
87+
; CHECK: # %bb.0:
88+
; CHECK-NEXT: ld.w $a0, $a0, 0
89+
; CHECK-NEXT: vreplgr2vr.w $vr0, $a0
90+
; CHECK-NEXT: ret
91+
%tmp = load i32, ptr %ptr
92+
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
93+
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> zeroinitializer
94+
ret <4 x i32> %tmp2
95+
}
96+
97+
define <4 x i32> @vldrepl_w_offset(ptr %ptr) {
98+
; CHECK-LABEL: vldrepl_w_offset:
99+
; CHECK: # %bb.0:
100+
; CHECK-NEXT: ld.w $a0, $a0, 132
101+
; CHECK-NEXT: vreplgr2vr.w $vr0, $a0
102+
; CHECK-NEXT: ret
103+
%p = getelementptr i32, ptr %ptr, i64 33
104+
%tmp = load i32, ptr %p
105+
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
106+
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> zeroinitializer
107+
ret <4 x i32> %tmp2
108+
}
109+
110+
define <2 x i64> @vldrepl_d(ptr %ptr) {
111+
; CHECK-LABEL: vldrepl_d:
112+
; CHECK: # %bb.0:
113+
; CHECK-NEXT: ld.d $a0, $a0, 0
114+
; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
115+
; CHECK-NEXT: ret
116+
%tmp = load i64, ptr %ptr
117+
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
118+
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
119+
ret <2 x i64> %tmp2
120+
}
121+
122+
define <2 x i64> @vldrepl_d_offset(ptr %ptr) {
123+
; CHECK-LABEL: vldrepl_d_offset:
124+
; CHECK: # %bb.0:
125+
; CHECK-NEXT: ld.d $a0, $a0, 264
126+
; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
127+
; CHECK-NEXT: ret
128+
%p = getelementptr i64, ptr %ptr, i64 33
129+
%tmp = load i64, ptr %p
130+
%tmp1 = insertelement <2 x i64> zeroinitializer, i64 %tmp, i32 0
131+
%tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> poison, <2 x i32> zeroinitializer
132+
ret <2 x i64> %tmp2
133+
}
134+
135+
define <4 x float> @vldrepl_w_flt(ptr %ptr) {
136+
; CHECK-LABEL: vldrepl_w_flt:
137+
; CHECK: # %bb.0:
138+
; CHECK-NEXT: fld.s $fa0, $a0, 0
139+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
140+
; CHECK-NEXT: ret
141+
%tmp = load float, ptr %ptr
142+
%tmp1 = insertelement <4 x float> zeroinitializer, float %tmp, i32 0
143+
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> poison, <4 x i32> zeroinitializer
144+
ret <4 x float> %tmp2
145+
}
146+
147+
define <4 x float> @vldrepl_w_flt_offset(ptr %ptr) {
148+
; CHECK-LABEL: vldrepl_w_flt_offset:
149+
; CHECK: # %bb.0:
150+
; CHECK-NEXT: fld.s $fa0, $a0, 264
151+
; CHECK-NEXT: vreplvei.w $vr0, $vr0, 0
152+
; CHECK-NEXT: ret
153+
%p = getelementptr i64, ptr %ptr, i64 33
154+
%tmp = load float, ptr %p
155+
%tmp1 = insertelement <4 x float> zeroinitializer, float %tmp, i32 0
156+
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> poison, <4 x i32> zeroinitializer
157+
ret <4 x float> %tmp2
158+
}
159+
160+
define <2 x double> @vldrepl_d_dbl(ptr %ptr) {
161+
; CHECK-LABEL: vldrepl_d_dbl:
162+
; CHECK: # %bb.0:
163+
; CHECK-NEXT: fld.d $fa0, $a0, 0
164+
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
165+
; CHECK-NEXT: ret
166+
%tmp = load double, ptr %ptr
167+
%tmp1 = insertelement <2 x double> zeroinitializer, double %tmp, i32 0
168+
%tmp2 = shufflevector <2 x double> %tmp1, <2 x double> poison, <2 x i32> zeroinitializer
169+
ret <2 x double> %tmp2
170+
}
171+
172+
define <2 x double> @vldrepl_d_dbl_offset(ptr %ptr) {
173+
; CHECK-LABEL: vldrepl_d_dbl_offset:
174+
; CHECK: # %bb.0:
175+
; CHECK-NEXT: fld.d $fa0, $a0, 264
176+
; CHECK-NEXT: vreplvei.d $vr0, $vr0, 0
177+
; CHECK-NEXT: ret
178+
%p = getelementptr i64, ptr %ptr, i64 33
179+
%tmp = load double, ptr %p
180+
%tmp1 = insertelement <2 x double> zeroinitializer, double %tmp, i32 0
181+
%tmp2 = shufflevector <2 x double> %tmp1, <2 x double> poison, <2 x i32> zeroinitializer
182+
ret <2 x double> %tmp2
183+
}

0 commit comments

Comments
 (0)