@@ -1797,35 +1797,61 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
1797
1797
; RV32-NEXT: addi a3, a3, 48
1798
1798
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1799
1799
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
1800
- ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
1800
+ ; RV32-NEXT: vsrl.vi v16, v16, 1, v0.t
1801
+ ; RV32-NEXT: csrr a3, vlenb
1802
+ ; RV32-NEXT: slli a3, a3, 3
1803
+ ; RV32-NEXT: add a3, sp, a3
1804
+ ; RV32-NEXT: addi a3, a3, 48
1805
+ ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1801
1806
; RV32-NEXT: csrr a3, vlenb
1802
1807
; RV32-NEXT: li a4, 40
1803
1808
; RV32-NEXT: mul a3, a3, a4
1804
1809
; RV32-NEXT: add a3, sp, a3
1805
1810
; RV32-NEXT: addi a3, a3, 48
1806
1811
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1812
+ ; RV32-NEXT: csrr a3, vlenb
1813
+ ; RV32-NEXT: slli a3, a3, 3
1814
+ ; RV32-NEXT: add a3, sp, a3
1815
+ ; RV32-NEXT: addi a3, a3, 48
1816
+ ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
1807
1817
; RV32-NEXT: vand.vv v24, v24, v16, v0.t
1808
1818
; RV32-NEXT: csrr a3, vlenb
1809
1819
; RV32-NEXT: li a4, 24
1810
1820
; RV32-NEXT: mul a3, a3, a4
1811
1821
; RV32-NEXT: add a3, sp, a3
1812
1822
; RV32-NEXT: addi a3, a3, 48
1813
1823
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1814
- ; RV32-NEXT: vsub.vv v24, v16, v24, v0.t
1815
- ; RV32-NEXT: vand.vv v16, v24, v8, v0.t
1824
+ ; RV32-NEXT: vsub.vv v16, v16, v24, v0.t
1816
1825
; RV32-NEXT: csrr a3, vlenb
1817
1826
; RV32-NEXT: li a4, 24
1818
1827
; RV32-NEXT: mul a3, a3, a4
1819
1828
; RV32-NEXT: add a3, sp, a3
1820
1829
; RV32-NEXT: addi a3, a3, 48
1821
1830
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1822
- ; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
1831
+ ; RV32-NEXT: csrr a3, vlenb
1832
+ ; RV32-NEXT: li a4, 24
1833
+ ; RV32-NEXT: mul a3, a3, a4
1834
+ ; RV32-NEXT: add a3, sp, a3
1835
+ ; RV32-NEXT: addi a3, a3, 48
1836
+ ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1823
1837
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
1824
1838
; RV32-NEXT: csrr a3, vlenb
1839
+ ; RV32-NEXT: slli a3, a3, 3
1840
+ ; RV32-NEXT: add a3, sp, a3
1841
+ ; RV32-NEXT: addi a3, a3, 48
1842
+ ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
1843
+ ; RV32-NEXT: csrr a3, vlenb
1825
1844
; RV32-NEXT: li a4, 24
1826
1845
; RV32-NEXT: mul a3, a3, a4
1827
1846
; RV32-NEXT: add a3, sp, a3
1828
1847
; RV32-NEXT: addi a3, a3, 48
1848
+ ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
1849
+ ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
1850
+ ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
1851
+ ; RV32-NEXT: csrr a3, vlenb
1852
+ ; RV32-NEXT: slli a3, a3, 3
1853
+ ; RV32-NEXT: add a3, sp, a3
1854
+ ; RV32-NEXT: addi a3, a3, 48
1829
1855
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
1830
1856
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
1831
1857
; RV32-NEXT: csrr a3, vlenb
@@ -1891,29 +1917,45 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
1891
1917
; RV32-NEXT: addi a0, a0, 48
1892
1918
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1893
1919
; RV32-NEXT: vand.vv v16, v24, v16, v0.t
1894
- ; RV32-NEXT: vsub.vv v24, v8, v16, v0.t
1920
+ ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
1921
+ ; RV32-NEXT: csrr a0, vlenb
1922
+ ; RV32-NEXT: li a1, 40
1923
+ ; RV32-NEXT: mul a0, a0, a1
1924
+ ; RV32-NEXT: add a0, sp, a0
1925
+ ; RV32-NEXT: addi a0, a0, 48
1926
+ ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
1895
1927
; RV32-NEXT: csrr a0, vlenb
1896
1928
; RV32-NEXT: slli a0, a0, 5
1897
1929
; RV32-NEXT: add a0, sp, a0
1898
1930
; RV32-NEXT: addi a0, a0, 48
1899
1931
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1900
- ; RV32-NEXT: vand.vv v16, v24, v8, v0.t
1901
1932
; RV32-NEXT: csrr a0, vlenb
1902
1933
; RV32-NEXT: li a1, 40
1903
1934
; RV32-NEXT: mul a0, a0, a1
1904
1935
; RV32-NEXT: add a0, sp, a0
1905
1936
; RV32-NEXT: addi a0, a0, 48
1937
+ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1938
+ ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
1939
+ ; RV32-NEXT: csrr a0, vlenb
1940
+ ; RV32-NEXT: slli a0, a0, 4
1941
+ ; RV32-NEXT: add a0, sp, a0
1942
+ ; RV32-NEXT: addi a0, a0, 48
1906
1943
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
1907
- ; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t
1944
+ ; RV32-NEXT: csrr a0, vlenb
1945
+ ; RV32-NEXT: li a1, 40
1946
+ ; RV32-NEXT: mul a0, a0, a1
1947
+ ; RV32-NEXT: add a0, sp, a0
1948
+ ; RV32-NEXT: addi a0, a0, 48
1949
+ ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
1950
+ ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
1908
1951
; RV32-NEXT: csrr a0, vlenb
1909
1952
; RV32-NEXT: slli a0, a0, 5
1910
1953
; RV32-NEXT: add a0, sp, a0
1911
1954
; RV32-NEXT: addi a0, a0, 48
1912
1955
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
1913
1956
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
1914
1957
; RV32-NEXT: csrr a0, vlenb
1915
- ; RV32-NEXT: li a1, 40
1916
- ; RV32-NEXT: mul a0, a0, a1
1958
+ ; RV32-NEXT: slli a0, a0, 4
1917
1959
; RV32-NEXT: add a0, sp, a0
1918
1960
; RV32-NEXT: addi a0, a0, 48
1919
1961
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -3983,35 +4025,61 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
3983
4025
; RV32-NEXT: addi a3, a3, 48
3984
4026
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
3985
4027
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
3986
- ; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t
4028
+ ; RV32-NEXT: vsrl.vi v16, v16, 1, v0.t
4029
+ ; RV32-NEXT: csrr a3, vlenb
4030
+ ; RV32-NEXT: slli a3, a3, 3
4031
+ ; RV32-NEXT: add a3, sp, a3
4032
+ ; RV32-NEXT: addi a3, a3, 48
4033
+ ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
3987
4034
; RV32-NEXT: csrr a3, vlenb
3988
4035
; RV32-NEXT: li a4, 40
3989
4036
; RV32-NEXT: mul a3, a3, a4
3990
4037
; RV32-NEXT: add a3, sp, a3
3991
4038
; RV32-NEXT: addi a3, a3, 48
3992
4039
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
4040
+ ; RV32-NEXT: csrr a3, vlenb
4041
+ ; RV32-NEXT: slli a3, a3, 3
4042
+ ; RV32-NEXT: add a3, sp, a3
4043
+ ; RV32-NEXT: addi a3, a3, 48
4044
+ ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
3993
4045
; RV32-NEXT: vand.vv v24, v24, v16, v0.t
3994
4046
; RV32-NEXT: csrr a3, vlenb
3995
4047
; RV32-NEXT: li a4, 24
3996
4048
; RV32-NEXT: mul a3, a3, a4
3997
4049
; RV32-NEXT: add a3, sp, a3
3998
4050
; RV32-NEXT: addi a3, a3, 48
3999
4051
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
4000
- ; RV32-NEXT: vsub.vv v24, v16, v24, v0.t
4001
- ; RV32-NEXT: vand.vv v16, v24, v8, v0.t
4052
+ ; RV32-NEXT: vsub.vv v16, v16, v24, v0.t
4002
4053
; RV32-NEXT: csrr a3, vlenb
4003
4054
; RV32-NEXT: li a4, 24
4004
4055
; RV32-NEXT: mul a3, a3, a4
4005
4056
; RV32-NEXT: add a3, sp, a3
4006
4057
; RV32-NEXT: addi a3, a3, 48
4007
4058
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
4008
- ; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t
4059
+ ; RV32-NEXT: csrr a3, vlenb
4060
+ ; RV32-NEXT: li a4, 24
4061
+ ; RV32-NEXT: mul a3, a3, a4
4062
+ ; RV32-NEXT: add a3, sp, a3
4063
+ ; RV32-NEXT: addi a3, a3, 48
4064
+ ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
4009
4065
; RV32-NEXT: vand.vv v16, v16, v8, v0.t
4010
4066
; RV32-NEXT: csrr a3, vlenb
4067
+ ; RV32-NEXT: slli a3, a3, 3
4068
+ ; RV32-NEXT: add a3, sp, a3
4069
+ ; RV32-NEXT: addi a3, a3, 48
4070
+ ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
4071
+ ; RV32-NEXT: csrr a3, vlenb
4011
4072
; RV32-NEXT: li a4, 24
4012
4073
; RV32-NEXT: mul a3, a3, a4
4013
4074
; RV32-NEXT: add a3, sp, a3
4014
4075
; RV32-NEXT: addi a3, a3, 48
4076
+ ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
4077
+ ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t
4078
+ ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
4079
+ ; RV32-NEXT: csrr a3, vlenb
4080
+ ; RV32-NEXT: slli a3, a3, 3
4081
+ ; RV32-NEXT: add a3, sp, a3
4082
+ ; RV32-NEXT: addi a3, a3, 48
4015
4083
; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
4016
4084
; RV32-NEXT: vadd.vv v8, v8, v16, v0.t
4017
4085
; RV32-NEXT: csrr a3, vlenb
@@ -4077,29 +4145,45 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
4077
4145
; RV32-NEXT: addi a0, a0, 48
4078
4146
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
4079
4147
; RV32-NEXT: vand.vv v16, v24, v16, v0.t
4080
- ; RV32-NEXT: vsub.vv v24, v8, v16, v0.t
4148
+ ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t
4149
+ ; RV32-NEXT: csrr a0, vlenb
4150
+ ; RV32-NEXT: li a1, 40
4151
+ ; RV32-NEXT: mul a0, a0, a1
4152
+ ; RV32-NEXT: add a0, sp, a0
4153
+ ; RV32-NEXT: addi a0, a0, 48
4154
+ ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
4081
4155
; RV32-NEXT: csrr a0, vlenb
4082
4156
; RV32-NEXT: slli a0, a0, 5
4083
4157
; RV32-NEXT: add a0, sp, a0
4084
4158
; RV32-NEXT: addi a0, a0, 48
4085
4159
; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
4086
- ; RV32-NEXT: vand.vv v16, v24, v8, v0.t
4087
4160
; RV32-NEXT: csrr a0, vlenb
4088
4161
; RV32-NEXT: li a1, 40
4089
4162
; RV32-NEXT: mul a0, a0, a1
4090
4163
; RV32-NEXT: add a0, sp, a0
4091
4164
; RV32-NEXT: addi a0, a0, 48
4165
+ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
4166
+ ; RV32-NEXT: vand.vv v16, v16, v8, v0.t
4167
+ ; RV32-NEXT: csrr a0, vlenb
4168
+ ; RV32-NEXT: slli a0, a0, 4
4169
+ ; RV32-NEXT: add a0, sp, a0
4170
+ ; RV32-NEXT: addi a0, a0, 48
4092
4171
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
4093
- ; RV32-NEXT: vsrl.vi v8, v24, 2, v0.t
4172
+ ; RV32-NEXT: csrr a0, vlenb
4173
+ ; RV32-NEXT: li a1, 40
4174
+ ; RV32-NEXT: mul a0, a0, a1
4175
+ ; RV32-NEXT: add a0, sp, a0
4176
+ ; RV32-NEXT: addi a0, a0, 48
4177
+ ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
4178
+ ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
4094
4179
; RV32-NEXT: csrr a0, vlenb
4095
4180
; RV32-NEXT: slli a0, a0, 5
4096
4181
; RV32-NEXT: add a0, sp, a0
4097
4182
; RV32-NEXT: addi a0, a0, 48
4098
4183
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
4099
4184
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
4100
4185
; RV32-NEXT: csrr a0, vlenb
4101
- ; RV32-NEXT: li a1, 40
4102
- ; RV32-NEXT: mul a0, a0, a1
4186
+ ; RV32-NEXT: slli a0, a0, 4
4103
4187
; RV32-NEXT: add a0, sp, a0
4104
4188
; RV32-NEXT: addi a0, a0, 48
4105
4189
; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
0 commit comments