Skip to content

Commit 199cbec

Browse files
committed
[RISCV] Don't try to form VECREDUCE without vector instructions
This fixes a bug in f0505c which wasn't noticed until 7a0b9da had landed. This triggered a revert of 7a0b9da, which will be reapplied after this fix.
1 parent 6cbf6f5 commit 199cbec

File tree

2 files changed

+108
-0
lines changed

2 files changed

+108
-0
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -11129,6 +11129,11 @@ combineBinOpOfExtractToReduceTree(SDNode *N, SelectionDAG &DAG,
1112911129
if (DAG.NewNodesMustHaveLegalTypes)
1113011130
return SDValue();
1113111131

11132+
// Without V, this transform isn't useful. We could form the (illegal)
11133+
// operations and let them be scalarized again, but there's really no point.
11134+
if (!Subtarget.hasVInstructions())
11135+
return SDValue();
11136+
1113211137
const SDLoc DL(N);
1113311138
const EVT VT = N->getValueType(0);
1113411139
[[maybe_unused]] const unsigned Opc = N->getOpcode();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
3+
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
4+
5+
; Negative test to ensure we don't try to generate a vector reduce when
6+
; vector instructions are not available.
7+
8+
define i32 @reduce_sum_4xi32(<4 x i32> %v) {
9+
; RV32-LABEL: reduce_sum_4xi32:
10+
; RV32: # %bb.0:
11+
; RV32-NEXT: lw a1, 12(a0)
12+
; RV32-NEXT: lw a2, 4(a0)
13+
; RV32-NEXT: lw a3, 0(a0)
14+
; RV32-NEXT: lw a0, 8(a0)
15+
; RV32-NEXT: add a2, a3, a2
16+
; RV32-NEXT: add a0, a0, a1
17+
; RV32-NEXT: add a0, a2, a0
18+
; RV32-NEXT: ret
19+
;
20+
; RV64-LABEL: reduce_sum_4xi32:
21+
; RV64: # %bb.0:
22+
; RV64-NEXT: lw a1, 24(a0)
23+
; RV64-NEXT: lw a2, 8(a0)
24+
; RV64-NEXT: lw a3, 0(a0)
25+
; RV64-NEXT: lw a0, 16(a0)
26+
; RV64-NEXT: add a2, a3, a2
27+
; RV64-NEXT: add a0, a0, a1
28+
; RV64-NEXT: addw a0, a2, a0
29+
; RV64-NEXT: ret
30+
%e0 = extractelement <4 x i32> %v, i32 0
31+
%e1 = extractelement <4 x i32> %v, i32 1
32+
%e2 = extractelement <4 x i32> %v, i32 2
33+
%e3 = extractelement <4 x i32> %v, i32 3
34+
%add0 = add i32 %e0, %e1
35+
%add1 = add i32 %add0, %e2
36+
%add2 = add i32 %add1, %e3
37+
ret i32 %add2
38+
}
39+
40+
define i32 @reduce_xor_4xi32(<4 x i32> %v) {
41+
; RV32-LABEL: reduce_xor_4xi32:
42+
; RV32: # %bb.0:
43+
; RV32-NEXT: lw a1, 12(a0)
44+
; RV32-NEXT: lw a2, 4(a0)
45+
; RV32-NEXT: lw a3, 0(a0)
46+
; RV32-NEXT: lw a0, 8(a0)
47+
; RV32-NEXT: xor a2, a3, a2
48+
; RV32-NEXT: xor a0, a0, a1
49+
; RV32-NEXT: xor a0, a2, a0
50+
; RV32-NEXT: ret
51+
;
52+
; RV64-LABEL: reduce_xor_4xi32:
53+
; RV64: # %bb.0:
54+
; RV64-NEXT: ld a1, 24(a0)
55+
; RV64-NEXT: ld a2, 8(a0)
56+
; RV64-NEXT: ld a3, 0(a0)
57+
; RV64-NEXT: ld a0, 16(a0)
58+
; RV64-NEXT: xor a2, a3, a2
59+
; RV64-NEXT: xor a0, a0, a1
60+
; RV64-NEXT: xor a0, a2, a0
61+
; RV64-NEXT: ret
62+
%e0 = extractelement <4 x i32> %v, i32 0
63+
%e1 = extractelement <4 x i32> %v, i32 1
64+
%e2 = extractelement <4 x i32> %v, i32 2
65+
%e3 = extractelement <4 x i32> %v, i32 3
66+
%xor0 = xor i32 %e0, %e1
67+
%xor1 = xor i32 %xor0, %e2
68+
%xor2 = xor i32 %xor1, %e3
69+
ret i32 %xor2
70+
}
71+
72+
define i32 @reduce_or_4xi32(<4 x i32> %v) {
73+
; RV32-LABEL: reduce_or_4xi32:
74+
; RV32: # %bb.0:
75+
; RV32-NEXT: lw a1, 12(a0)
76+
; RV32-NEXT: lw a2, 4(a0)
77+
; RV32-NEXT: lw a3, 0(a0)
78+
; RV32-NEXT: lw a0, 8(a0)
79+
; RV32-NEXT: or a2, a3, a2
80+
; RV32-NEXT: or a0, a0, a1
81+
; RV32-NEXT: or a0, a2, a0
82+
; RV32-NEXT: ret
83+
;
84+
; RV64-LABEL: reduce_or_4xi32:
85+
; RV64: # %bb.0:
86+
; RV64-NEXT: ld a1, 24(a0)
87+
; RV64-NEXT: ld a2, 8(a0)
88+
; RV64-NEXT: ld a3, 0(a0)
89+
; RV64-NEXT: ld a0, 16(a0)
90+
; RV64-NEXT: or a2, a3, a2
91+
; RV64-NEXT: or a0, a0, a1
92+
; RV64-NEXT: or a0, a2, a0
93+
; RV64-NEXT: ret
94+
%e0 = extractelement <4 x i32> %v, i32 0
95+
%e1 = extractelement <4 x i32> %v, i32 1
96+
%e2 = extractelement <4 x i32> %v, i32 2
97+
%e3 = extractelement <4 x i32> %v, i32 3
98+
%or0 = or i32 %e0, %e1
99+
%or1 = or i32 %or0, %e2
100+
%or2 = or i32 %or1, %e3
101+
ret i32 %or2
102+
}
103+

0 commit comments

Comments
 (0)