llvm · lukel97 · Jul 30, 2024 · Jul 30, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3855,11 +3855,19 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
   // If we end up changing the VL or mask of True, then we need to make sure it
   // doesn't raise any observable fp exceptions, since changing the active
   // elements will affect how fflags is set.
-  if (TrueVL != VL || !IsMasked)
+  if (TrueVL != VL || !IsMasked) {
     if (mayRaiseFPException(True.getNode()) &&
         !True->getFlags().hasNoFPExcept())
       return false;
 
+    // If the EEW of True is different from vmerge's SEW, then we cannot change
+    // the VL or mask.
+    if (Log2_64(True.getScalarValueSizeInBits()) !=
+        N->getConstantOperandVal(
+            RISCVII::getSEWOpNum(TII->get(N->getMachineOpcode())) - 1))
+      return false;
+  }
+
   SDLoc DL(N);
 
   // From the preconditions we checked above, we know the mask and thus glue

diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -1196,3 +1196,43 @@ define <vscale x 2 x i32> @true_mask_vmerge_implicit_passthru(<vscale x 2 x i32>
   )
   ret <vscale x 2 x i32> %b
 }
+
+define <vscale x 2 x i32> @unfoldable_mismatched_sew_mask(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, <vscale x 2 x i1> %mask, i64 %avl) {
+; CHECK-LABEL: unfoldable_mismatched_sew_mask:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-NEXT:    vadd.vv v9, v9, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, ma
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+  %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 %avl)
+  %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
+  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
+    <vscale x 2 x i32> %passthru,
+    <vscale x 2 x i32> %passthru,
+    <vscale x 2 x i32> %a.bitcast,
+    <vscale x 2 x i1> %mask,
+    i64 %avl
+  )
+  ret <vscale x 2 x i32> %b
+}
+
+define <vscale x 2 x i32> @unfoldable_mismatched_sew_avl(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y) {
+; CHECK-LABEL: unfoldable_mismatched_sew_avl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 5, e64, m1, ta, ma
+; CHECK-NEXT:    vadd.vv v9, v9, v10
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, i64 5)
+  %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
+  %b = call <vscale x 2 x i32> @llvm.riscv.vmerge.nxv2i32.nxv2i32(
+    <vscale x 2 x i32> %passthru,
+    <vscale x 2 x i32> %passthru,
+    <vscale x 2 x i32> %a.bitcast,
+    <vscale x 2 x i1> splat (i1 true),
+    i64 3
+  )
+  ret <vscale x 2 x i32> %b
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-peephole.ll
@@ -180,3 +180,17 @@ define <vscale x 2 x i32> @unfoldable_vredsum(<vscale x 2 x i32> %passthru, <vsc
   %b = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a, iXLen 1)
   ret <vscale x 2 x i32> %b
 }
+
+define <vscale x 2 x i32> @unfoldable_mismatched_sew_diff_vl(<vscale x 2 x i32> %passthru, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y) {
+; CHECK-LABEL: unfoldable_mismatched_sew_diff_vl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e64, m1, ta, ma
+; CHECK-NEXT:    vadd.vv v9, v9, v10
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+  %a = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64(<vscale x 1 x i64> poison, <vscale x 1 x i64> %x, <vscale x 1 x i64> %y, iXLen 6)
+  %a.bitcast = bitcast <vscale x 1 x i64> %a to <vscale x 2 x i32>
+  %b = call <vscale x 2 x i32> @llvm.riscv.vmv.v.v.nxv2i32(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %a.bitcast, iXLen 3)
+  ret <vscale x 2 x i32> %b
+}