Skip to content

Commit 02668f6

Browse files
authored
[RISCV] Match single source deinterleave shuffles for vnsrl (#114878)
We had previously only been matching the two source case where both sources came from a wider source type. We can also match the single source case - provided the result is m4 or smaller because we will need a wider type to represent the source. The main goal of this to ensure that vnsrl matching is robust to a possible change in canonicalization for length changing shuffles that I'm considering, but it has the nice effect of picking up a few cases we missed along the way.
1 parent c02da38 commit 02668f6

5 files changed

+112
-92
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4426,48 +4426,58 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
44264426
}
44274427

44284428
// Is this a shuffle extracts either the even or odd elements of a vector?
4429-
// That is, specifically, either (a) or (b) below.
4430-
// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4431-
// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4432-
// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4433-
// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4434-
// Returns {Src Vector, Even Elements} on success
4435-
static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4436-
SDValue V2, ArrayRef<int> Mask,
4437-
const RISCVSubtarget &Subtarget) {
4429+
// That is, specifically, either (a) or (b) in the options below.
4430+
// Single operand shuffle is easy:
4431+
// a) t35: v8i8 = vector_shuffle<0,2,4,6,u,u,u,u> t34, undef
4432+
// b) t35: v8i8 = vector_shuffle<1,3,5,7,u,u,u,u> t34, undef
4433+
// Double operand shuffle:
4434+
// t34: v8i8 = extract_subvector t11, Constant:i64<0>
4435+
// t33: v8i8 = extract_subvector t11, Constant:i64<8>
4436+
// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
4437+
// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
4438+
static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
4439+
SDValue V2, ArrayRef<int> Mask,
4440+
const RISCVSubtarget &Subtarget) {
44384441
// Need to be able to widen the vector.
44394442
if (VT.getScalarSizeInBits() >= Subtarget.getELen())
4440-
return false;
4443+
return SDValue();
4444+
4445+
// First index must be the first even or odd element from V1.
4446+
if (Mask[0] != 0 && Mask[0] != 1)
4447+
return SDValue();
4448+
4449+
// The others must increase by 2 each time.
4450+
for (unsigned i = 1; i != Mask.size(); ++i)
4451+
if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2)
4452+
return SDValue();
4453+
4454+
if (1 == count_if(Mask, [](int Idx) { return Idx != -1; }))
4455+
return SDValue();
4456+
4457+
if (V2.isUndef() &&
4458+
RISCVTargetLowering::getLMUL(ContainerVT) != RISCVII::VLMUL::LMUL_8)
4459+
return V1;
44414460

44424461
// Both input must be extracts.
44434462
if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
44444463
V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
4445-
return false;
4464+
return SDValue();
44464465

44474466
// Extracting from the same source.
44484467
SDValue Src = V1.getOperand(0);
44494468
if (Src != V2.getOperand(0))
4450-
return false;
4469+
return SDValue();
44514470

44524471
// Src needs to have twice the number of elements.
44534472
if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
4454-
return false;
4473+
return SDValue();
44554474

44564475
// The extracts must extract the two halves of the source.
44574476
if (V1.getConstantOperandVal(1) != 0 ||
44584477
V2.getConstantOperandVal(1) != Mask.size())
4459-
return false;
4460-
4461-
// First index must be the first even or odd element from V1.
4462-
if (Mask[0] != 0 && Mask[0] != 1)
4463-
return false;
4464-
4465-
// The others must increase by 2 each time (or be undef).
4466-
for (unsigned i = 1; i != Mask.size(); ++i)
4467-
if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2)
4468-
return false;
4478+
return SDValue();
44694479

4470-
return true;
4480+
return Src;
44714481
}
44724482

44734483
/// Is this shuffle interleaving contiguous elements from one vector into the
@@ -4597,7 +4607,8 @@ static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
45974607
assert(Src.getSimpleValueType().isFixedLengthVector());
45984608
ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
45994609

4600-
// The source is a vector of type <m x n*2 x ty>
4610+
// The source is a vector of type <m x n*2 x ty> (For the single source
4611+
// case, the high half is undefined)
46014612
MVT SrcContainerVT =
46024613
MVT::getVectorVT(ContainerVT.getVectorElementType(),
46034614
ContainerVT.getVectorElementCount() * 2);
@@ -5300,10 +5311,9 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
53005311

53015312
// If this is a deinterleave and we can widen the vector, then we can use
53025313
// vnsrl to deinterleave.
5303-
if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
5304-
return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
5305-
Subtarget, DAG);
5306-
}
5314+
if (SDValue Src =
5315+
isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget))
5316+
return getDeinterleaveViaVNSRL(DL, VT, Src, Mask[0] == 0, Subtarget, DAG);
53075317

53085318
if (SDValue V =
53095319
lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,29 +11,28 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
1111
; CHECK: # %bb.0:
1212
; CHECK-NEXT: li a1, 32
1313
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
14-
; CHECK-NEXT: vlm.v v0, (a0)
14+
; CHECK-NEXT: vlm.v v8, (a0)
15+
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
16+
; CHECK-NEXT: vslidedown.vi v0, v8, 2
1517
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
16-
; CHECK-NEXT: vmv.v.i v8, 0
17-
; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
18+
; CHECK-NEXT: vmv.v.i v9, 0
19+
; CHECK-NEXT: vmerge.vim v10, v9, 1, v0
20+
; CHECK-NEXT: vmv1r.v v0, v8
21+
; CHECK-NEXT: vmerge.vim v12, v9, 1, v0
22+
; CHECK-NEXT: vnsrl.wi v8, v12, 0
1823
; CHECK-NEXT: vid.v v9
1924
; CHECK-NEXT: vadd.vv v11, v9, v9
20-
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
21-
; CHECK-NEXT: vslidedown.vi v0, v0, 2
22-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
23-
; CHECK-NEXT: vrgather.vv v9, v10, v11
24-
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
2525
; CHECK-NEXT: li a0, -256
2626
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2727
; CHECK-NEXT: vmv.s.x v0, a0
2828
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
29-
; CHECK-NEXT: vadd.vi v12, v11, -16
30-
; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t
31-
; CHECK-NEXT: vmsne.vi v9, v9, 0
32-
; CHECK-NEXT: vadd.vi v12, v11, 1
33-
; CHECK-NEXT: vrgather.vv v13, v10, v12
34-
; CHECK-NEXT: vadd.vi v10, v11, -15
35-
; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t
36-
; CHECK-NEXT: vmsne.vi v8, v13, 0
29+
; CHECK-NEXT: vadd.vi v9, v11, -16
30+
; CHECK-NEXT: vrgather.vv v8, v10, v9, v0.t
31+
; CHECK-NEXT: vmsne.vi v9, v8, 0
32+
; CHECK-NEXT: vnsrl.wi v8, v12, 8
33+
; CHECK-NEXT: vadd.vi v11, v11, -15
34+
; CHECK-NEXT: vrgather.vv v8, v10, v11, v0.t
35+
; CHECK-NEXT: vmsne.vi v8, v8, 0
3736
; CHECK-NEXT: vmv.v.v v0, v9
3837
; CHECK-NEXT: ret
3938
%vec = load <32 x i1>, ptr %p

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -99,45 +99,39 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) {
9999
; RV32: # %bb.0:
100100
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
101101
; RV32-NEXT: vmv.v.i v12, 1
102-
; RV32-NEXT: vmv.v.i v14, 6
102+
; RV32-NEXT: vmv.v.i v13, 6
103103
; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma
104-
; RV32-NEXT: vslideup.vi v14, v12, 1
105-
; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
106-
; RV32-NEXT: vid.v v12
107-
; RV32-NEXT: vadd.vv v12, v12, v12
108-
; RV32-NEXT: vadd.vi v15, v12, 1
109-
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
110-
; RV32-NEXT: vrgatherei16.vv v12, v8, v15
104+
; RV32-NEXT: vslideup.vi v13, v12, 1
105+
; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma
106+
; RV32-NEXT: vslidedown.vi v16, v8, 8
107+
; RV32-NEXT: vmv4r.v v20, v8
108+
; RV32-NEXT: li a0, 32
109+
; RV32-NEXT: vmv2r.v v22, v14
111110
; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
112111
; RV32-NEXT: vmv.v.i v0, 10
113-
; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma
114-
; RV32-NEXT: vslidedown.vi v8, v8, 8
115112
; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
116-
; RV32-NEXT: vrgatherei16.vv v12, v8, v14, v0.t
117-
; RV32-NEXT: vmv1r.v v8, v12
113+
; RV32-NEXT: vnsrl.wx v8, v20, a0
114+
; RV32-NEXT: vrgatherei16.vv v8, v16, v13, v0.t
118115
; RV32-NEXT: ret
119116
;
120117
; RV64-LABEL: v4i32_v16i32:
121118
; RV64: # %bb.0:
122-
; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
123-
; RV64-NEXT: vid.v v12
124-
; RV64-NEXT: vadd.vv v12, v12, v12
125-
; RV64-NEXT: vadd.vi v14, v12, 1
126-
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
127-
; RV64-NEXT: vrgatherei16.vv v12, v8, v14
128-
; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
129-
; RV64-NEXT: vmv.v.i v0, 10
130119
; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma
131-
; RV64-NEXT: vslidedown.vi v8, v8, 8
120+
; RV64-NEXT: vslidedown.vi v16, v8, 8
121+
; RV64-NEXT: vmv4r.v v20, v8
122+
; RV64-NEXT: li a0, 32
123+
; RV64-NEXT: vmv2r.v v22, v12
124+
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
125+
; RV64-NEXT: vnsrl.wx v8, v20, a0
126+
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
127+
; RV64-NEXT: vmv.v.i v0, 10
132128
; RV64-NEXT: li a0, 3
133129
; RV64-NEXT: slli a0, a0, 33
134130
; RV64-NEXT: addi a0, a0, 1
135131
; RV64-NEXT: slli a0, a0, 16
136-
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
137132
; RV64-NEXT: vmv.v.x v10, a0
138133
; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu
139-
; RV64-NEXT: vrgatherei16.vv v12, v8, v10, v0.t
140-
; RV64-NEXT: vmv1r.v v8, v12
134+
; RV64-NEXT: vrgatherei16.vv v8, v16, v10, v0.t
141135
; RV64-NEXT: ret
142136
%2 = shufflevector <16 x i32> %0, <16 x i32> poison, <4 x i32> <i32 1, i32 9, i32 5, i32 14>
143137
ret <4 x i32> %2

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -444,10 +444,8 @@ define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) {
444444
; CHECK: # %bb.0: # %entry
445445
; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
446446
; CHECK-NEXT: vle8.v v8, (a0)
447-
; CHECK-NEXT: vid.v v9
448-
; CHECK-NEXT: vadd.vv v9, v9, v9
449-
; CHECK-NEXT: vrgather.vv v10, v8, v9
450-
; CHECK-NEXT: vse8.v v10, (a1)
447+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
448+
; CHECK-NEXT: vse8.v v8, (a1)
451449
; CHECK-NEXT: ret
452450
entry:
453451
%0 = load <8 x i8>, ptr %in, align 1
@@ -461,14 +459,33 @@ define void @vnsrl_0_i8_single_src2(ptr %in, ptr %out) {
461459
; CHECK: # %bb.0: # %entry
462460
; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
463461
; CHECK-NEXT: vle8.v v8, (a0)
464-
; CHECK-NEXT: vid.v v9
465-
; CHECK-NEXT: vadd.vv v9, v9, v9
466-
; CHECK-NEXT: vrgather.vv v10, v8, v9
467-
; CHECK-NEXT: vse8.v v10, (a1)
462+
; CHECK-NEXT: vnsrl.wi v8, v8, 0
463+
; CHECK-NEXT: vse8.v v8, (a1)
468464
; CHECK-NEXT: ret
469465
entry:
470466
%0 = load <8 x i8>, ptr %in, align 1
471467
%shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
472468
store <8 x i8> %shuffle.i5, ptr %out, align 1
473469
ret void
474470
}
471+
472+
; Can't match the m8 result type as the source would have to be m16 which
473+
; isn't a legal type.
474+
define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) {
475+
; CHECK-LABEL: vnsrl_0_i32_single_src_m8:
476+
; CHECK: # %bb.0: # %entry
477+
; CHECK-NEXT: li a2, 64
478+
; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma
479+
; CHECK-NEXT: vle32.v v8, (a0)
480+
; CHECK-NEXT: vid.v v16
481+
; CHECK-NEXT: vadd.vv v16, v16, v16
482+
; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma
483+
; CHECK-NEXT: vrgatherei16.vv v24, v8, v16
484+
; CHECK-NEXT: vse32.v v24, (a1)
485+
; CHECK-NEXT: ret
486+
entry:
487+
%0 = load <64 x i32>, ptr %in, align 4
488+
%shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
489+
store <64 x i32> %shuffle.i5, ptr %out, align 4
490+
ret void
491+
}

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,28 +7,28 @@
77
define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) {
88
; CHECK-LABEL: vector_deinterleave_v16i1_v32i1:
99
; CHECK: # %bb.0:
10-
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
11-
; CHECK-NEXT: vmv.v.i v8, 0
12-
; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
13-
; CHECK-NEXT: vid.v v9
14-
; CHECK-NEXT: vadd.vv v11, v9, v9
10+
; CHECK-NEXT: vmv1r.v v8, v0
1511
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
1612
; CHECK-NEXT: vslidedown.vi v0, v0, 2
1713
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
18-
; CHECK-NEXT: vrgather.vv v9, v10, v11
19-
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
14+
; CHECK-NEXT: vmv.v.i v9, 0
15+
; CHECK-NEXT: vmerge.vim v10, v9, 1, v0
16+
; CHECK-NEXT: vmv1r.v v0, v8
17+
; CHECK-NEXT: vmerge.vim v12, v9, 1, v0
18+
; CHECK-NEXT: vnsrl.wi v8, v12, 0
19+
; CHECK-NEXT: vid.v v9
20+
; CHECK-NEXT: vadd.vv v11, v9, v9
2021
; CHECK-NEXT: li a0, -256
2122
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
2223
; CHECK-NEXT: vmv.s.x v0, a0
2324
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
24-
; CHECK-NEXT: vadd.vi v12, v11, -16
25-
; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t
26-
; CHECK-NEXT: vmsne.vi v9, v9, 0
27-
; CHECK-NEXT: vadd.vi v12, v11, 1
28-
; CHECK-NEXT: vrgather.vv v13, v10, v12
29-
; CHECK-NEXT: vadd.vi v10, v11, -15
30-
; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t
31-
; CHECK-NEXT: vmsne.vi v8, v13, 0
25+
; CHECK-NEXT: vadd.vi v9, v11, -16
26+
; CHECK-NEXT: vrgather.vv v8, v10, v9, v0.t
27+
; CHECK-NEXT: vmsne.vi v9, v8, 0
28+
; CHECK-NEXT: vnsrl.wi v8, v12, 8
29+
; CHECK-NEXT: vadd.vi v11, v11, -15
30+
; CHECK-NEXT: vrgather.vv v8, v10, v11, v0.t
31+
; CHECK-NEXT: vmsne.vi v8, v8, 0
3232
; CHECK-NEXT: vmv.v.v v0, v9
3333
; CHECK-NEXT: ret
3434
%retval = call {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1> %vec)

0 commit comments

Comments
 (0)