Skip to content

Commit 35bf925

Browse files
committed
[RISCV] Delete dead COPYs to vmv0 during vmv0 elimination
This fixes a crash reported at #126850 (comment), where we may leave around a COPY to vmv0 after peeking through it. Even though the COPY is dead, there's no pass between vmv0 elimination and regalloc that will delete it so regalloc will try to allocate something for it. The test showcasing this is added in vmv0-elimination.mir. Removing the dead COPY results in changes in spills in the >= LMUL 16 VP tests, but it's worth noting that these tests are very noisy and not representative of real world code.
1 parent 1b17d1e commit 35bf925

29 files changed

+665
-505
lines changed

llvm/lib/Target/RISCV/RISCVVMV0Elimination.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) {
115115
#endif
116116

117117
bool MadeChange = false;
118+
SmallVector<MachineInstr *> DeadCopies;
118119

119120
// For any instruction with a vmv0 operand, replace it with a copy to v0.
120121
for (MachineBasicBlock &MBB : MF) {
@@ -132,8 +133,12 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) {
132133
// Peek through a single copy to match what isel does.
133134
if (MachineInstr *SrcMI = MRI.getVRegDef(Src);
134135
SrcMI->isCopy() && SrcMI->getOperand(1).getReg().isVirtual() &&
135-
SrcMI->getOperand(1).getSubReg() == RISCV::NoSubRegister)
136+
SrcMI->getOperand(1).getSubReg() == RISCV::NoSubRegister) {
137+
// Delete any dead copys to vmv0 to avoid allocating them.
138+
if (MRI.hasOneNonDBGUse(Src))
139+
DeadCopies.push_back(SrcMI);
136140
Src = SrcMI->getOperand(1).getReg();
141+
}
137142

138143
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::COPY), RISCV::V0)
139144
.addReg(Src);
@@ -146,6 +151,9 @@ bool RISCVVMV0Elimination::runOnMachineFunction(MachineFunction &MF) {
146151
}
147152
}
148153

154+
for (MachineInstr *MI : DeadCopies)
155+
MI->eraseFromParent();
156+
149157
if (!MadeChange)
150158
return false;
151159

llvm/test/CodeGen/RISCV/rvv/expandload.ll

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,13 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
11451145
; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t
11461146
; CHECK-RV64-NEXT: addi a0, sp, 16
11471147
; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
1148-
; CHECK-RV64-NEXT: viota.m v8, v7
1148+
; CHECK-RV64-NEXT: viota.m v16, v7
1149+
; CHECK-RV64-NEXT: csrr a0, vlenb
1150+
; CHECK-RV64-NEXT: li a1, 24
1151+
; CHECK-RV64-NEXT: mul a0, a0, a1
1152+
; CHECK-RV64-NEXT: add a0, sp, a0
1153+
; CHECK-RV64-NEXT: addi a0, a0, 16
1154+
; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
11491155
; CHECK-RV64-NEXT: vmv1r.v v0, v7
11501156
; CHECK-RV64-NEXT: csrr a0, vlenb
11511157
; CHECK-RV64-NEXT: slli a0, a0, 4
@@ -1157,6 +1163,12 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32>
11571163
; CHECK-RV64-NEXT: add a0, sp, a0
11581164
; CHECK-RV64-NEXT: addi a0, a0, 16
11591165
; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1166+
; CHECK-RV64-NEXT: csrr a0, vlenb
1167+
; CHECK-RV64-NEXT: li a1, 24
1168+
; CHECK-RV64-NEXT: mul a0, a0, a1
1169+
; CHECK-RV64-NEXT: add a0, sp, a0
1170+
; CHECK-RV64-NEXT: addi a0, a0, 16
1171+
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
11601172
; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t
11611173
; CHECK-RV64-NEXT: addi a0, sp, 16
11621174
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -1374,7 +1386,13 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
13741386
; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t
13751387
; CHECK-RV32-NEXT: addi a0, sp, 16
13761388
; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
1377-
; CHECK-RV32-NEXT: viota.m v8, v7
1389+
; CHECK-RV32-NEXT: viota.m v16, v7
1390+
; CHECK-RV32-NEXT: csrr a0, vlenb
1391+
; CHECK-RV32-NEXT: li a1, 24
1392+
; CHECK-RV32-NEXT: mul a0, a0, a1
1393+
; CHECK-RV32-NEXT: add a0, sp, a0
1394+
; CHECK-RV32-NEXT: addi a0, a0, 16
1395+
; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
13781396
; CHECK-RV32-NEXT: vmv1r.v v0, v7
13791397
; CHECK-RV32-NEXT: csrr a0, vlenb
13801398
; CHECK-RV32-NEXT: slli a0, a0, 4
@@ -1386,6 +1404,12 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
13861404
; CHECK-RV32-NEXT: add a0, sp, a0
13871405
; CHECK-RV32-NEXT: addi a0, a0, 16
13881406
; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1407+
; CHECK-RV32-NEXT: csrr a0, vlenb
1408+
; CHECK-RV32-NEXT: li a1, 24
1409+
; CHECK-RV32-NEXT: mul a0, a0, a1
1410+
; CHECK-RV32-NEXT: add a0, sp, a0
1411+
; CHECK-RV32-NEXT: addi a0, a0, 16
1412+
; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
13891413
; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t
13901414
; CHECK-RV32-NEXT: addi a0, sp, 16
13911415
; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -1447,7 +1471,13 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
14471471
; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t
14481472
; CHECK-RV64-NEXT: addi a0, sp, 16
14491473
; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
1450-
; CHECK-RV64-NEXT: viota.m v8, v7
1474+
; CHECK-RV64-NEXT: viota.m v16, v7
1475+
; CHECK-RV64-NEXT: csrr a0, vlenb
1476+
; CHECK-RV64-NEXT: li a1, 24
1477+
; CHECK-RV64-NEXT: mul a0, a0, a1
1478+
; CHECK-RV64-NEXT: add a0, sp, a0
1479+
; CHECK-RV64-NEXT: addi a0, a0, 16
1480+
; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
14511481
; CHECK-RV64-NEXT: vmv1r.v v0, v7
14521482
; CHECK-RV64-NEXT: csrr a0, vlenb
14531483
; CHECK-RV64-NEXT: slli a0, a0, 4
@@ -1459,6 +1489,12 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64>
14591489
; CHECK-RV64-NEXT: add a0, sp, a0
14601490
; CHECK-RV64-NEXT: addi a0, a0, 16
14611491
; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
1492+
; CHECK-RV64-NEXT: csrr a0, vlenb
1493+
; CHECK-RV64-NEXT: li a1, 24
1494+
; CHECK-RV64-NEXT: mul a0, a0, a1
1495+
; CHECK-RV64-NEXT: add a0, sp, a0
1496+
; CHECK-RV64-NEXT: addi a0, a0, 16
1497+
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
14621498
; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t
14631499
; CHECK-RV64-NEXT: addi a0, sp, 16
14641500
; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -763,7 +763,7 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
763763
; CHECK-NEXT: addi a1, a1, 16
764764
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
765765
; CHECK-NEXT: li a2, 16
766-
; CHECK-NEXT: vslidedown.vi v24, v0, 2
766+
; CHECK-NEXT: vslidedown.vi v7, v0, 2
767767
; CHECK-NEXT: mv a1, a0
768768
; CHECK-NEXT: bltu a0, a2, .LBB26_2
769769
; CHECK-NEXT: # %bb.1:
@@ -788,28 +788,27 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex
788788
; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t
789789
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
790790
; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t
791-
; CHECK-NEXT: addi a1, sp, 16
792-
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
793-
; CHECK-NEXT: vmv1r.v v0, v24
791+
; CHECK-NEXT: vmv1r.v v0, v7
794792
; CHECK-NEXT: csrr a1, vlenb
795793
; CHECK-NEXT: slli a1, a1, 3
796794
; CHECK-NEXT: add a1, sp, a1
797795
; CHECK-NEXT: addi a1, a1, 16
798796
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
799797
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
800-
; CHECK-NEXT: vfabs.v v8, v16, v0.t
798+
; CHECK-NEXT: vfabs.v v24, v16, v0.t
799+
; CHECK-NEXT: addi a0, sp, 16
800+
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
801+
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
801802
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
802-
; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t
803+
; CHECK-NEXT: vmflt.vf v7, v24, fa5, v0.t
803804
; CHECK-NEXT: fsrmi a0, 3
804-
; CHECK-NEXT: vmv1r.v v0, v24
805+
; CHECK-NEXT: vmv1r.v v0, v7
805806
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
806-
; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t
807+
; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t
807808
; CHECK-NEXT: fsrm a0
808-
; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t
809+
; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t
809810
; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu
810-
; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t
811-
; CHECK-NEXT: addi a0, sp, 16
812-
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
811+
; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t
813812
; CHECK-NEXT: csrr a0, vlenb
814813
; CHECK-NEXT: slli a0, a0, 4
815814
; CHECK-NEXT: add sp, sp, a0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll

Lines changed: 102 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,35 +1797,61 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
17971797
; RV32-NEXT: addi a3, a3, 48
17981798
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
17991799
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
1800-
; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
1800+
; RV32-NEXT: vsrl.vi v16, v16, 1, v0.t
1801+
; RV32-NEXT: csrr a3, vlenb
1802+
; RV32-NEXT: slli a3, a3, 3
1803+
; RV32-NEXT: add a3, sp, a3
1804+
; RV32-NEXT: addi a3, a3, 48
1805+
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
18011806
; RV32-NEXT: csrr a3, vlenb
18021807
; RV32-NEXT: li a4, 40
18031808
; RV32-NEXT: mul a3, a3, a4
18041809
; RV32-NEXT: add a3, sp, a3
18051810
; RV32-NEXT: addi a3, a3, 48
18061811
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1812+
; RV32-NEXT: csrr a3, vlenb
1813+
; RV32-NEXT: slli a3, a3, 3
1814+
; RV32-NEXT: add a3, sp, a3
1815+
; RV32-NEXT: addi a3, a3, 48
1816+
; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
18071817
; RV32-NEXT: vand.vv v24, v24, v16, v0.t
18081818
; RV32-NEXT: csrr a3, vlenb
18091819
; RV32-NEXT: li a4, 24
18101820
; RV32-NEXT: mul a3, a3, a4
18111821
; RV32-NEXT: add a3, sp, a3
18121822
; RV32-NEXT: addi a3, a3, 48
18131823
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1814-
; RV32-NEXT: vsub.vv v24, v16, v24, v0.t
1815-
; RV32-NEXT: vand.vv v16, v24, v8, v0.t
1824+
; RV32-NEXT: vsub.vv v16, v16, v24, v0.t
18161825
; RV32-NEXT: csrr a3, vlenb
18171826
; RV32-NEXT: li a4, 24
18181827
; RV32-NEXT: mul a3, a3, a4
18191828
; RV32-NEXT: add a3, sp, a3
18201829
; RV32-NEXT: addi a3, a3, 48
18211830
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1822-
; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
1831+
; RV32-NEXT: csrr a3, vlenb
1832+
; RV32-NEXT: li a4, 24
1833+
; RV32-NEXT: mul a3, a3, a4
1834+
; RV32-NEXT: add a3, sp, a3
1835+
; RV32-NEXT: addi a3, a3, 48
1836+
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
18231837
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
18241838
; RV32-NEXT: csrr a3, vlenb
1839+
; RV32-NEXT: slli a3, a3, 3
1840+
; RV32-NEXT: add a3, sp, a3
1841+
; RV32-NEXT: addi a3, a3, 48
1842+
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1843+
; RV32-NEXT: csrr a3, vlenb
18251844
; RV32-NEXT: li a4, 24
18261845
; RV32-NEXT: mul a3, a3, a4
18271846
; RV32-NEXT: add a3, sp, a3
18281847
; RV32-NEXT: addi a3, a3, 48
1848+
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1849+
; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
1850+
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
1851+
; RV32-NEXT: csrr a3, vlenb
1852+
; RV32-NEXT: slli a3, a3, 3
1853+
; RV32-NEXT: add a3, sp, a3
1854+
; RV32-NEXT: addi a3, a3, 48
18291855
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
18301856
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
18311857
; RV32-NEXT: csrr a3, vlenb
@@ -1891,29 +1917,45 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
18911917
; RV32-NEXT: addi a0, a0, 48
18921918
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
18931919
; RV32-NEXT: vand.vv v16, v24, v16, v0.t
1894-
; RV32-NEXT: vsub.vv v24, v8, v16, v0.t
1920+
; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
1921+
; RV32-NEXT: csrr a0, vlenb
1922+
; RV32-NEXT: li a1, 40
1923+
; RV32-NEXT: mul a0, a0, a1
1924+
; RV32-NEXT: add a0, sp, a0
1925+
; RV32-NEXT: addi a0, a0, 48
1926+
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
18951927
; RV32-NEXT: csrr a0, vlenb
18961928
; RV32-NEXT: slli a0, a0, 5
18971929
; RV32-NEXT: add a0, sp, a0
18981930
; RV32-NEXT: addi a0, a0, 48
18991931
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1900-
; RV32-NEXT: vand.vv v16, v24, v8, v0.t
19011932
; RV32-NEXT: csrr a0, vlenb
19021933
; RV32-NEXT: li a1, 40
19031934
; RV32-NEXT: mul a0, a0, a1
19041935
; RV32-NEXT: add a0, sp, a0
19051936
; RV32-NEXT: addi a0, a0, 48
1937+
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1938+
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
1939+
; RV32-NEXT: csrr a0, vlenb
1940+
; RV32-NEXT: slli a0, a0, 4
1941+
; RV32-NEXT: add a0, sp, a0
1942+
; RV32-NEXT: addi a0, a0, 48
19061943
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1907-
; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t
1944+
; RV32-NEXT: csrr a0, vlenb
1945+
; RV32-NEXT: li a1, 40
1946+
; RV32-NEXT: mul a0, a0, a1
1947+
; RV32-NEXT: add a0, sp, a0
1948+
; RV32-NEXT: addi a0, a0, 48
1949+
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1950+
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
19081951
; RV32-NEXT: csrr a0, vlenb
19091952
; RV32-NEXT: slli a0, a0, 5
19101953
; RV32-NEXT: add a0, sp, a0
19111954
; RV32-NEXT: addi a0, a0, 48
19121955
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
19131956
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
19141957
; RV32-NEXT: csrr a0, vlenb
1915-
; RV32-NEXT: li a1, 40
1916-
; RV32-NEXT: mul a0, a0, a1
1958+
; RV32-NEXT: slli a0, a0, 4
19171959
; RV32-NEXT: add a0, sp, a0
19181960
; RV32-NEXT: addi a0, a0, 48
19191961
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -3983,35 +4025,61 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
39834025
; RV32-NEXT: addi a3, a3, 48
39844026
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
39854027
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
3986-
; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
4028+
; RV32-NEXT: vsrl.vi v16, v16, 1, v0.t
4029+
; RV32-NEXT: csrr a3, vlenb
4030+
; RV32-NEXT: slli a3, a3, 3
4031+
; RV32-NEXT: add a3, sp, a3
4032+
; RV32-NEXT: addi a3, a3, 48
4033+
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
39874034
; RV32-NEXT: csrr a3, vlenb
39884035
; RV32-NEXT: li a4, 40
39894036
; RV32-NEXT: mul a3, a3, a4
39904037
; RV32-NEXT: add a3, sp, a3
39914038
; RV32-NEXT: addi a3, a3, 48
39924039
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
4040+
; RV32-NEXT: csrr a3, vlenb
4041+
; RV32-NEXT: slli a3, a3, 3
4042+
; RV32-NEXT: add a3, sp, a3
4043+
; RV32-NEXT: addi a3, a3, 48
4044+
; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
39934045
; RV32-NEXT: vand.vv v24, v24, v16, v0.t
39944046
; RV32-NEXT: csrr a3, vlenb
39954047
; RV32-NEXT: li a4, 24
39964048
; RV32-NEXT: mul a3, a3, a4
39974049
; RV32-NEXT: add a3, sp, a3
39984050
; RV32-NEXT: addi a3, a3, 48
39994051
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
4000-
; RV32-NEXT: vsub.vv v24, v16, v24, v0.t
4001-
; RV32-NEXT: vand.vv v16, v24, v8, v0.t
4052+
; RV32-NEXT: vsub.vv v16, v16, v24, v0.t
40024053
; RV32-NEXT: csrr a3, vlenb
40034054
; RV32-NEXT: li a4, 24
40044055
; RV32-NEXT: mul a3, a3, a4
40054056
; RV32-NEXT: add a3, sp, a3
40064057
; RV32-NEXT: addi a3, a3, 48
40074058
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
4008-
; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
4059+
; RV32-NEXT: csrr a3, vlenb
4060+
; RV32-NEXT: li a4, 24
4061+
; RV32-NEXT: mul a3, a3, a4
4062+
; RV32-NEXT: add a3, sp, a3
4063+
; RV32-NEXT: addi a3, a3, 48
4064+
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
40094065
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
40104066
; RV32-NEXT: csrr a3, vlenb
4067+
; RV32-NEXT: slli a3, a3, 3
4068+
; RV32-NEXT: add a3, sp, a3
4069+
; RV32-NEXT: addi a3, a3, 48
4070+
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
4071+
; RV32-NEXT: csrr a3, vlenb
40114072
; RV32-NEXT: li a4, 24
40124073
; RV32-NEXT: mul a3, a3, a4
40134074
; RV32-NEXT: add a3, sp, a3
40144075
; RV32-NEXT: addi a3, a3, 48
4076+
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
4077+
; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
4078+
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
4079+
; RV32-NEXT: csrr a3, vlenb
4080+
; RV32-NEXT: slli a3, a3, 3
4081+
; RV32-NEXT: add a3, sp, a3
4082+
; RV32-NEXT: addi a3, a3, 48
40154083
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
40164084
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
40174085
; RV32-NEXT: csrr a3, vlenb
@@ -4077,29 +4145,45 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
40774145
; RV32-NEXT: addi a0, a0, 48
40784146
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
40794147
; RV32-NEXT: vand.vv v16, v24, v16, v0.t
4080-
; RV32-NEXT: vsub.vv v24, v8, v16, v0.t
4148+
; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
4149+
; RV32-NEXT: csrr a0, vlenb
4150+
; RV32-NEXT: li a1, 40
4151+
; RV32-NEXT: mul a0, a0, a1
4152+
; RV32-NEXT: add a0, sp, a0
4153+
; RV32-NEXT: addi a0, a0, 48
4154+
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
40814155
; RV32-NEXT: csrr a0, vlenb
40824156
; RV32-NEXT: slli a0, a0, 5
40834157
; RV32-NEXT: add a0, sp, a0
40844158
; RV32-NEXT: addi a0, a0, 48
40854159
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
4086-
; RV32-NEXT: vand.vv v16, v24, v8, v0.t
40874160
; RV32-NEXT: csrr a0, vlenb
40884161
; RV32-NEXT: li a1, 40
40894162
; RV32-NEXT: mul a0, a0, a1
40904163
; RV32-NEXT: add a0, sp, a0
40914164
; RV32-NEXT: addi a0, a0, 48
4165+
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
4166+
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
4167+
; RV32-NEXT: csrr a0, vlenb
4168+
; RV32-NEXT: slli a0, a0, 4
4169+
; RV32-NEXT: add a0, sp, a0
4170+
; RV32-NEXT: addi a0, a0, 48
40924171
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
4093-
; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t
4172+
; RV32-NEXT: csrr a0, vlenb
4173+
; RV32-NEXT: li a1, 40
4174+
; RV32-NEXT: mul a0, a0, a1
4175+
; RV32-NEXT: add a0, sp, a0
4176+
; RV32-NEXT: addi a0, a0, 48
4177+
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
4178+
; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
40944179
; RV32-NEXT: csrr a0, vlenb
40954180
; RV32-NEXT: slli a0, a0, 5
40964181
; RV32-NEXT: add a0, sp, a0
40974182
; RV32-NEXT: addi a0, a0, 48
40984183
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
40994184
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
41004185
; RV32-NEXT: csrr a0, vlenb
4101-
; RV32-NEXT: li a1, 40
4102-
; RV32-NEXT: mul a0, a0, a1
4186+
; RV32-NEXT: slli a0, a0, 4
41034187
; RV32-NEXT: add a0, sp, a0
41044188
; RV32-NEXT: addi a0, a0, 48
41054189
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload

0 commit comments

Comments
 (0)