Skip to content

Commit 7629679

Browse files
committed
Pass accumulator from function in tests
1 parent 31b7567 commit 7629679

File tree

1 file changed

+30
-38
lines changed

1 file changed

+30
-38
lines changed
Lines changed: 30 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,104 +1,96 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc -mtriple=aarch64 -mattr=+sve2 %s -o - | FileCheck %s
33

4-
define <vscale x 4 x i32> @dotp(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
4+
define <vscale x 4 x i32> @dotp(<vscale x 4 x i32> %acc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
55
; CHECK-LABEL: dotp:
66
; CHECK: // %bb.0: // %entry
7-
; CHECK-NEXT: mov z2.s, #0 // =0x0
8-
; CHECK-NEXT: udot z2.s, z0.b, z1.b
9-
; CHECK-NEXT: mov z0.d, z2.d
7+
; CHECK-NEXT: udot z0.s, z1.b, z2.b
108
; CHECK-NEXT: ret
119
entry:
1210
%a.wide = zext <vscale x 16 x i8> %a to <vscale x 16 x i32>
1311
%b.wide = zext <vscale x 16 x i8> %b to <vscale x 16 x i32>
1412
%mult = mul nuw nsw <vscale x 16 x i32> %a.wide, %b.wide
15-
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> zeroinitializer, <vscale x 16 x i32> %mult)
13+
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 16 x i32> %mult)
1614
ret <vscale x 4 x i32> %partial.reduce
1715
}
1816

19-
define <vscale x 2 x i64> @dotp_wide(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
17+
define <vscale x 2 x i64> @dotp_wide(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
2018
; CHECK-LABEL: dotp_wide:
2119
; CHECK: // %bb.0: // %entry
22-
; CHECK-NEXT: mov z2.d, #0 // =0x0
23-
; CHECK-NEXT: udot z2.d, z0.h, z1.h
24-
; CHECK-NEXT: mov z0.d, z2.d
20+
; CHECK-NEXT: udot z0.d, z1.h, z2.h
2521
; CHECK-NEXT: ret
2622
entry:
2723
%a.wide = zext <vscale x 8 x i16> %a to <vscale x 8 x i64>
2824
%b.wide = zext <vscale x 8 x i16> %b to <vscale x 8 x i64>
2925
%mult = mul nuw nsw <vscale x 8 x i64> %a.wide, %b.wide
30-
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> zeroinitializer, <vscale x 8 x i64> %mult)
26+
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> %acc, <vscale x 8 x i64> %mult)
3127
ret <vscale x 2 x i64> %partial.reduce
3228
}
3329

34-
define <vscale x 4 x i32> @dotp_sext(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
30+
define <vscale x 4 x i32> @dotp_sext(<vscale x 4 x i32> %accc, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
3531
; CHECK-LABEL: dotp_sext:
3632
; CHECK: // %bb.0: // %entry
37-
; CHECK-NEXT: mov z2.s, #0 // =0x0
38-
; CHECK-NEXT: sdot z2.s, z0.b, z1.b
39-
; CHECK-NEXT: mov z0.d, z2.d
33+
; CHECK-NEXT: sdot z0.s, z1.b, z2.b
4034
; CHECK-NEXT: ret
4135
entry:
4236
%a.wide = sext <vscale x 16 x i8> %a to <vscale x 16 x i32>
4337
%b.wide = sext <vscale x 16 x i8> %b to <vscale x 16 x i32>
4438
%mult = mul nuw nsw <vscale x 16 x i32> %a.wide, %b.wide
45-
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> zeroinitializer, <vscale x 16 x i32> %mult)
39+
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %accc, <vscale x 16 x i32> %mult)
4640
ret <vscale x 4 x i32> %partial.reduce
4741
}
4842

49-
define <vscale x 2 x i64> @dotp_wide_sext(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
43+
define <vscale x 2 x i64> @dotp_wide_sext(<vscale x 2 x i64> %acc, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
5044
; CHECK-LABEL: dotp_wide_sext:
5145
; CHECK: // %bb.0: // %entry
52-
; CHECK-NEXT: mov z2.d, #0 // =0x0
53-
; CHECK-NEXT: sdot z2.d, z0.h, z1.h
54-
; CHECK-NEXT: mov z0.d, z2.d
46+
; CHECK-NEXT: sdot z0.d, z1.h, z2.h
5547
; CHECK-NEXT: ret
5648
entry:
5749
%a.wide = sext <vscale x 8 x i16> %a to <vscale x 8 x i64>
5850
%b.wide = sext <vscale x 8 x i16> %b to <vscale x 8 x i64>
5951
%mult = mul nuw nsw <vscale x 8 x i64> %a.wide, %b.wide
60-
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> zeroinitializer, <vscale x 8 x i64> %mult)
52+
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv8i64(<vscale x 2 x i64> %acc, <vscale x 8 x i64> %mult)
6153
ret <vscale x 2 x i64> %partial.reduce
6254
}
6355

64-
define <vscale x 4 x i32> @not_dotp(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
56+
define <vscale x 4 x i32> @not_dotp(<vscale x 4 x i32> %acc, <vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
6557
; CHECK-LABEL: not_dotp:
6658
; CHECK: // %bb.0: // %entry
67-
; CHECK-NEXT: and z0.h, z0.h, #0xff
6859
; CHECK-NEXT: and z1.h, z1.h, #0xff
60+
; CHECK-NEXT: and z2.h, z2.h, #0xff
6961
; CHECK-NEXT: ptrue p0.s
70-
; CHECK-NEXT: uunpkhi z2.s, z0.h
71-
; CHECK-NEXT: uunpkhi z3.s, z1.h
72-
; CHECK-NEXT: uunpklo z0.s, z0.h
73-
; CHECK-NEXT: uunpklo z1.s, z1.h
74-
; CHECK-NEXT: mul z2.s, z2.s, z3.s
75-
; CHECK-NEXT: mad z0.s, p0/m, z1.s, z2.s
62+
; CHECK-NEXT: uunpklo z3.s, z1.h
63+
; CHECK-NEXT: uunpklo z4.s, z2.h
64+
; CHECK-NEXT: uunpkhi z1.s, z1.h
65+
; CHECK-NEXT: uunpkhi z2.s, z2.h
66+
; CHECK-NEXT: mla z0.s, p0/m, z3.s, z4.s
67+
; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s
7668
; CHECK-NEXT: ret
7769
entry:
7870
%a.wide = zext <vscale x 8 x i8> %a to <vscale x 8 x i32>
7971
%b.wide = zext <vscale x 8 x i8> %b to <vscale x 8 x i32>
8072
%mult = mul nuw nsw <vscale x 8 x i32> %a.wide, %b.wide
81-
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> zeroinitializer, <vscale x 8 x i32> %mult)
73+
%partial.reduce = tail call <vscale x 4 x i32> @llvm.experimental.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> %acc, <vscale x 8 x i32> %mult)
8274
ret <vscale x 4 x i32> %partial.reduce
8375
}
8476

85-
define <vscale x 2 x i64> @not_dotp_wide(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
77+
define <vscale x 2 x i64> @not_dotp_wide(<vscale x 2 x i64> %acc, <vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
8678
; CHECK-LABEL: not_dotp_wide:
8779
; CHECK: // %bb.0: // %entry
88-
; CHECK-NEXT: and z0.s, z0.s, #0xffff
8980
; CHECK-NEXT: and z1.s, z1.s, #0xffff
81+
; CHECK-NEXT: and z2.s, z2.s, #0xffff
9082
; CHECK-NEXT: ptrue p0.d
91-
; CHECK-NEXT: uunpkhi z2.d, z0.s
92-
; CHECK-NEXT: uunpkhi z3.d, z1.s
93-
; CHECK-NEXT: uunpklo z0.d, z0.s
94-
; CHECK-NEXT: uunpklo z1.d, z1.s
95-
; CHECK-NEXT: mul z2.d, z2.d, z3.d
96-
; CHECK-NEXT: mad z0.d, p0/m, z1.d, z2.d
83+
; CHECK-NEXT: uunpklo z3.d, z1.s
84+
; CHECK-NEXT: uunpklo z4.d, z2.s
85+
; CHECK-NEXT: uunpkhi z1.d, z1.s
86+
; CHECK-NEXT: uunpkhi z2.d, z2.s
87+
; CHECK-NEXT: mla z0.d, p0/m, z3.d, z4.d
88+
; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d
9789
; CHECK-NEXT: ret
9890
entry:
9991
%a.wide = zext <vscale x 4 x i16> %a to <vscale x 4 x i64>
10092
%b.wide = zext <vscale x 4 x i16> %b to <vscale x 4 x i64>
10193
%mult = mul nuw nsw <vscale x 4 x i64> %a.wide, %b.wide
102-
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv4i64(<vscale x 2 x i64> zeroinitializer, <vscale x 4 x i64> %mult)
94+
%partial.reduce = tail call <vscale x 2 x i64> @llvm.experimental.vector.partial.reduce.add.nxv2i64.nxv4i64(<vscale x 2 x i64> %acc, <vscale x 4 x i64> %mult)
10395
ret <vscale x 2 x i64> %partial.reduce
10496
}

0 commit comments

Comments
 (0)