-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[RISCV] Fix illegal build_vector when lowering double id buildvec on RV32 #67017
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v ChangesWhen lowering a constant build_vector sequence of doubles on RV32, if the t15: v2i64 = BUILD_VECTOR Constant:i32<1>, Constant:i32<1> This patch fixes this by lowering the splats with SelectionDAG::getConstant Full diff: https://github.com/llvm/llvm-project/pull/67017.diff 4 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d176fcaf54c2db0..7f41b95580900b5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3379,18 +3379,16 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
VID = convertFromScalableVector(VIDVT, VID, DAG, Subtarget);
if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
(StepOpcode == ISD::SHL && SplatStepVal != 0)) {
- SDValue SplatStep = DAG.getSplatBuildVector(
- VIDVT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
+ SDValue SplatStep = DAG.getConstant(SplatStepVal, DL, VIDVT);
VID = DAG.getNode(StepOpcode, DL, VIDVT, VID, SplatStep);
}
if (StepDenominator != 1) {
- SDValue SplatStep = DAG.getSplatBuildVector(
- VIDVT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
+ SDValue SplatStep =
+ DAG.getConstant(Log2_64(StepDenominator), DL, VIDVT);
VID = DAG.getNode(ISD::SRL, DL, VIDVT, VID, SplatStep);
}
if (Addend != 0 || Negate) {
- SDValue SplatAddend = DAG.getSplatBuildVector(
- VIDVT, DL, DAG.getConstant(Addend, DL, XLenVT));
+ SDValue SplatAddend = DAG.getConstant(Addend, DL, VIDVT);
VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VIDVT, SplatAddend,
VID);
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index a2fde2addc14e66..0c1ad4c1311b596 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1076,3 +1076,132 @@ define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double
%v31 = insertelement <32 x double> %v30, double %e31, i64 31
ret <32 x double> %v31
}
+
+define <2 x half> @vid_v2f16() {
+; CHECK-LABEL: vid_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vfcvt.f.x.v v8, v8
+; CHECK-NEXT: ret
+ ret <2 x half> <half 0.0, half 1.0>
+}
+
+define <2 x half> @vid_addend1_v2f16() {
+; CHECK-LABEL: vid_addend1_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vadd.vi v8, v8, 1
+; CHECK-NEXT: vfcvt.f.x.v v8, v8
+; CHECK-NEXT: ret
+ ret <2 x half> <half 1.0, half 2.0>
+}
+
+define <2 x half> @vid_denominator2_v2f16() {
+; CHECK-LABEL: vid_denominator2_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI26_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI26_0)
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: ret
+ ret <2 x half> <half 0.5, half 1.0>
+}
+
+define <2 x half> @vid_step2_v2f16() {
+; CHECK-LABEL: vid_step2_v2f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vfcvt.f.x.v v8, v8
+; CHECK-NEXT: ret
+ ret <2 x half> <half 0.0, half 2.0>
+}
+
+define <2 x float> @vid_v2f32() {
+; CHECK-LABEL: vid_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vfcvt.f.x.v v8, v8
+; CHECK-NEXT: ret
+ ret <2 x float> <float 0.0, float 1.0>
+}
+
+define <2 x float> @vid_addend1_v2f32() {
+; CHECK-LABEL: vid_addend1_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vadd.vi v8, v8, 1
+; CHECK-NEXT: vfcvt.f.x.v v8, v8
+; CHECK-NEXT: ret
+ ret <2 x float> <float 1.0, float 2.0>
+}
+
+define <2 x float> @vid_denominator2_v2f32() {
+; CHECK-LABEL: vid_denominator2_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI30_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0)
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: ret
+ ret <2 x float> <float 0.5, float 1.0>
+}
+
+define <2 x float> @vid_step2_v2f32() {
+; CHECK-LABEL: vid_step2_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vfcvt.f.x.v v8, v8
+; CHECK-NEXT: ret
+ ret <2 x float> <float 0.0, float 2.0>
+}
+
+define <2 x double> @vid_v2f64() {
+; CHECK-LABEL: vid_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vfcvt.f.x.v v8, v8
+; CHECK-NEXT: ret
+ ret <2 x double> <double 0.0, double 1.0>
+}
+
+define <2 x double> @vid_addend1_v2f64() {
+; CHECK-LABEL: vid_addend1_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vadd.vi v8, v8, 1
+; CHECK-NEXT: vfcvt.f.x.v v8, v8
+; CHECK-NEXT: ret
+ ret <2 x double> <double 1.0, double 2.0>
+}
+
+define <2 x double> @vid_denominator2_v2f64() {
+; CHECK-LABEL: vid_denominator2_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, %hi(.LCPI34_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI34_0)
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: ret
+ ret <2 x double> <double 0.5, double 1.0>
+}
+
+define <2 x double> @vid_step2_v2f64() {
+; CHECK-LABEL: vid_step2_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: vadd.vv v8, v8, v8
+; CHECK-NEXT: vfcvt.f.x.v v8, v8
+; CHECK-NEXT: ret
+ ret <2 x double> <double 0.0, double 2.0>
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 79947ca4cdf0696..61b8bd76df211a5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -259,25 +259,40 @@ define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() {
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vmv.v.i v9, 3
; CHECK-NEXT: vid.v v8
-; CHECK-NEXT: li a0, -3
+; CHECK-NEXT: li a0, 253
; CHECK-NEXT: vmadd.vx v8, a0, v9
; CHECK-NEXT: ret
ret <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>
}
define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
-; CHECK-LABEL: buildvec_vid_stepn3_addn3_v4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.i v8, -3
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: li a4, -3
-; CHECK-NEXT: vmadd.vx v9, a4, v8
-; CHECK-NEXT: vse32.v v9, (a0)
-; CHECK-NEXT: vse32.v v9, (a1)
-; CHECK-NEXT: vse32.v v9, (a2)
-; CHECK-NEXT: vse32.v v9, (a3)
-; CHECK-NEXT: ret
+; RV32-LABEL: buildvec_vid_stepn3_addn3_v4i32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vmv.v.i v8, -3
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: li a4, -3
+; RV32-NEXT: vmadd.vx v9, a4, v8
+; RV32-NEXT: vse32.v v9, (a0)
+; RV32-NEXT: vse32.v v9, (a1)
+; RV32-NEXT: vse32.v v9, (a2)
+; RV32-NEXT: vse32.v v9, (a3)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: buildvec_vid_stepn3_addn3_v4i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vmv.v.i v8, -3
+; RV64-NEXT: vid.v v9
+; RV64-NEXT: li a4, 1
+; RV64-NEXT: slli a4, a4, 32
+; RV64-NEXT: addi a4, a4, -3
+; RV64-NEXT: vmadd.vx v9, a4, v8
+; RV64-NEXT: vse32.v v9, (a0)
+; RV64-NEXT: vse32.v v9, (a1)
+; RV64-NEXT: vse32.v v9, (a2)
+; RV64-NEXT: vse32.v v9, (a3)
+; RV64-NEXT: ret
store <4 x i32> <i32 -3, i32 -6, i32 -9, i32 -12>, ptr %z0
store <4 x i32> <i32 undef, i32 -6, i32 -9, i32 -12>, ptr %z1
store <4 x i32> <i32 undef, i32 undef, i32 -9, i32 -12>, ptr %z2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index dbf7dfbcab49cb1..46d8493b9255841 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1461,29 +1461,55 @@ define void @mulhs_v8i16(ptr %x) {
}
define void @mulhs_v6i16(ptr %x) {
-; CHECK-LABEL: mulhs_v6i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 7
-; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: li a1, -14
-; CHECK-NEXT: vmadd.vx v10, a1, v9
-; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 4
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vdiv.vv v9, v9, v10
-; CHECK-NEXT: vmv.v.i v0, 6
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v10, -7
-; CHECK-NEXT: vmerge.vim v10, v10, 7, v0
-; CHECK-NEXT: vdiv.vv v8, v8, v10
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 4
-; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT: vse16.v v8, (a0)
-; CHECK-NEXT: ret
+; RV32-LABEL: mulhs_v6i16:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; RV32-NEXT: vle16.v v8, (a0)
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: vmv.v.i v9, 7
+; RV32-NEXT: vid.v v10
+; RV32-NEXT: lui a1, 16
+; RV32-NEXT: addi a1, a1, -14
+; RV32-NEXT: vmadd.vx v10, a1, v9
+; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v9, v8, 4
+; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT: vdiv.vv v9, v9, v10
+; RV32-NEXT: vmv.v.i v0, 6
+; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV32-NEXT: vmv.v.i v10, -7
+; RV32-NEXT: vmerge.vim v10, v10, 7, v0
+; RV32-NEXT: vdiv.vv v8, v8, v10
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vslideup.vi v8, v9, 4
+; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; RV32-NEXT: vse16.v v8, (a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: mulhs_v6i16:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; RV64-NEXT: vle16.v v8, (a0)
+; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT: vmv.v.i v9, 7
+; RV64-NEXT: vid.v v10
+; RV64-NEXT: lui a1, 16
+; RV64-NEXT: addiw a1, a1, -14
+; RV64-NEXT: vmadd.vx v10, a1, v9
+; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma
+; RV64-NEXT: vslidedown.vi v9, v8, 4
+; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT: vdiv.vv v9, v9, v10
+; RV64-NEXT: vmv.v.i v0, 6
+; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; RV64-NEXT: vmv.v.i v10, -7
+; RV64-NEXT: vmerge.vim v10, v10, 7, v0
+; RV64-NEXT: vdiv.vv v8, v8, v10
+; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64-NEXT: vslideup.vi v8, v9, 4
+; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; RV64-NEXT: vse16.v v8, (a0)
+; RV64-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = sdiv <6 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7>
store <6 x i16> %b, ptr %x
|
@@ -259,25 +259,40 @@ define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() { | |||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma | |||
; CHECK-NEXT: vmv.v.i v9, 3 | |||
; CHECK-NEXT: vid.v v8 | |||
; CHECK-NEXT: li a0, -3 | |||
; CHECK-NEXT: li a0, 253 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This and the regressions below are caused by SelectionDAG::getConstant
not sign-extending the splat value. I'll try to fix this in a separate PR
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#67027 should fix this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Talked to Luke about this offline. My summary is that while we are generating the zero vs sign extended constant for the i8, that we believe hasAllNBitUsers handles this case. Our theory is that the difference in the LI immediate comes down to the fact that using a different constant on the LI isn't profitable (in either direction) - i.e. no rooting transform. This seems backed up by the fact we don't see any other test differences which are material.
2e37f7e
to
ec7eba4
Compare
…RV32 When lowering a constant build_vector sequence of doubles on RV32, if the addend wasn't zero, or the step/denominator wasn't one, it would crash trying to lower an illegal build_vector of <n x i64> with i32 operands, e.g: t15: v2i64 = BUILD_VECTOR Constant:i32<1>, Constant:i32<1> This patch fixes this by lowering the splats with SelectionDAG::getConstant with the vector type, which handles making it legal via splat_vector_parts.
ec7eba4
to
4c4f2d0
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@@ -259,25 +259,40 @@ define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() { | |||
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma | |||
; CHECK-NEXT: vmv.v.i v9, 3 | |||
; CHECK-NEXT: vid.v v8 | |||
; CHECK-NEXT: li a0, -3 | |||
; CHECK-NEXT: li a0, 253 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Talked to Luke about this offline. My summary is that while we are generating the zero vs sign extended constant for the i8, that we believe hasAllNBitUsers handles this case. Our theory is that the difference in the LI immediate comes down to the fact that using a different constant on the LI isn't profitable (in either direction) - i.e. no rooting transform. This seems backed up by the fact we don't see any other test differences which are material.
When lowering a constant build_vector sequence of doubles on RV32, if the
addend wasn't zero, or the step/denominator wasn't one, it would crash trying
to emit an illegal build_vector of with i32 operands, e.g:
t15: v2i64 = BUILD_VECTOR Constant:i32<1>, Constant:i32<1>
This patch fixes this by lowering the splats with SelectionDAG::getConstant
with the vector type, which handles making it legal via splat_vector_parts.