diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3588ef46cadce..eddb2fbbd709c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -930,6 +930,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, VT, Custom); } } + + setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom); } for (MVT VT : VecTupleVTs) { @@ -1051,6 +1053,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, VT, Custom); + + setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom); }; // Sets common extload/truncstore actions on RVV floating-point vector @@ -1306,6 +1310,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, Custom); } + + setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom); } for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) { @@ -1434,6 +1440,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND, ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT}, VT, Custom); + + setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom); } // Custom-legalize bitcasts from fixed-length vectors to scalar types. @@ -7082,6 +7090,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::MSTORE: case ISD::VP_STORE: return lowerMaskedStore(Op, DAG); + case ISD::VECTOR_COMPRESS: + return lowerVectorCompress(Op, DAG); case ISD::SELECT_CC: { // This occurs because we custom legalize SETGT and SETUGT for setcc. That // causes LegalizeDAG to think we need to custom legalize select_cc. Expand @@ -11225,6 +11235,36 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, DAG.getVTList(MVT::Other), Ops, MemVT, MMO); } +SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Val = Op.getOperand(0); + SDValue Mask = Op.getOperand(1); + SDValue Passthru = Op.getOperand(2); + + MVT VT = Val.getSimpleValueType(); + MVT XLenVT = Subtarget.getXLenVT(); + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + MVT MaskVT = getMaskTypeFor(ContainerVT); + Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget); + } + + SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second; + SDValue Res = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT, + DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT), + Passthru, Val, Mask, VL); + + if (VT.isFixedLengthVector()) + Res = convertFromScalableVector(VT, Res, DAG, Subtarget); + + return Res; +} + SDValue RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index c374944795533..9191d9a9469b6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -955,6 +955,7 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll new file mode 100644 index 0000000000000..8f1ff7ed4a11e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-fp.ll @@ -0,0 +1,255 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s + +define <1 x half> @vector_compress_v1f16(<1 x half> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> undef) + ret <1 x half> %ret +} + +define <1 x half> @vector_compress_v1f16_passthru(<1 x half> %passthru, <1 x half> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1f16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> %passthru) + ret <1 x half> %ret +} + +define <2 x half> @vector_compress_v2f16(<2 x half> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> undef) + ret <2 x half> %ret +} + +define <2 x half> @vector_compress_v2f16_passthru(<2 x half> %passthru, <2 x half> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2f16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> %passthru) + ret <2 x half> %ret +} + +define <4 x half> @vector_compress_v4f16(<4 x half> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> undef) + ret <4 x half> %ret +} + +define <4 x half> @vector_compress_v4f16_passthru(<4 x half> %passthru, <4 x half> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4f16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> %passthru) + ret <4 x half> %ret +} + +define <8 x half> @vector_compress_v8f16(<8 x half> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> undef) + ret <8 x half> %ret +} + +define <8 x half> @vector_compress_v8f16_passthru(<8 x half> %passthru, <8 x half> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8f16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> %passthru) + ret <8 x half> %ret +} + +define <1 x float> @vector_compress_v1f32(<1 x float> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> undef) + ret <1 x float> %ret +} + +define <1 x float> @vector_compress_v1f32_passthru(<1 x float> %passthru, <1 x float> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1f32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> %passthru) + ret <1 x float> %ret +} + +define <2 x float> @vector_compress_v2f32(<2 x float> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> undef) + ret <2 x float> %ret +} + +define <2 x float> @vector_compress_v2f32_passthru(<2 x float> %passthru, <2 x float> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2f32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> %passthru) + ret <2 x float> %ret +} + +define <4 x float> @vector_compress_v4f32(<4 x float> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> undef) + ret <4 x float> %ret +} + +define <4 x float> @vector_compress_v4f32_passthru(<4 x float> %passthru, <4 x float> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4f32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> %passthru) + ret <4 x float> %ret +} + +define <8 x float> @vector_compress_v8f32(<8 x float> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> undef) + ret <8 x float> %ret +} + +define <8 x float> @vector_compress_v8f32_passthru(<8 x float> %passthru, <8 x float> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8f32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> %passthru) + ret <8 x float> %ret +} + +define <1 x double> @vector_compress_v1f64(<1 x double> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> undef) + ret <1 x double> %ret +} + +define <1 x double> @vector_compress_v1f64_passthru(<1 x double> %passthru, <1 x double> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1f64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> %passthru) + ret <1 x double> %ret +} + +define <2 x double> @vector_compress_v2f64(<2 x double> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> undef) + ret <2 x double> %ret +} + +define <2 x double> @vector_compress_v2f64_passthru(<2 x double> %passthru, <2 x double> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2f64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> %passthru) + ret <2 x double> %ret +} + +define <4 x double> @vector_compress_v4f64(<4 x double> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> undef) + ret <4 x double> %ret +} + +define <4 x double> @vector_compress_v4f64_passthru(<4 x double> %passthru, <4 x double> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4f64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> %passthru) + ret <4 x double> %ret +} + +define <8 x double> @vector_compress_v8f64(<8 x double> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vcompress.vm v12, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> undef) + ret <8 x double> %ret +} + +define <8 x double> @vector_compress_v8f64_passthru(<8 x double> %passthru, <8 x double> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8f64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v12, v0 +; CHECK-NEXT: ret + %ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> %passthru) + ret <8 x double> %ret +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll new file mode 100644 index 0000000000000..3952dc31838a2 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-compress-int.ll @@ -0,0 +1,339 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s + +define <1 x i8> @vector_compress_v1i8(<1 x i8> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <1 x i8> @llvm.experimental.vector.compress.v1i8(<1 x i8> %v, <1 x i1> %mask, <1 x i8> undef) + ret <1 x i8> %ret +} + +define <1 x i8> @vector_compress_v1i8_passthru(<1 x i8> %passthru, <1 x i8> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1i8_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <1 x i8> @llvm.experimental.vector.compress.v1i8(<1 x i8> %v, <1 x i1> %mask, <1 x i8> %passthru) + ret <1 x i8> %ret +} + +define <2 x i8> @vector_compress_v2i8(<2 x i8> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <2 x i8> @llvm.experimental.vector.compress.v2i8(<2 x i8> %v, <2 x i1> %mask, <2 x i8> undef) + ret <2 x i8> %ret +} + +define <2 x i8> @vector_compress_v2i8_passthru(<2 x i8> %passthru, <2 x i8> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2i8_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <2 x i8> @llvm.experimental.vector.compress.v2i8(<2 x i8> %v, <2 x i1> %mask, <2 x i8> %passthru) + ret <2 x i8> %ret +} + +define <4 x i8> @vector_compress_v4i8(<4 x i8> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <4 x i8> @llvm.experimental.vector.compress.v4i8(<4 x i8> %v, <4 x i1> %mask, <4 x i8> undef) + ret <4 x i8> %ret +} + +define <4 x i8> @vector_compress_v4i8_passthru(<4 x i8> %passthru, <4 x i8> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4i8_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <4 x i8> @llvm.experimental.vector.compress.v4i8(<4 x i8> %v, <4 x i1> %mask, <4 x i8> %passthru) + ret <4 x i8> %ret +} + +define <8 x i8> @vector_compress_v8i8(<8 x i8> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <8 x i8> @llvm.experimental.vector.compress.v8i8(<8 x i8> %v, <8 x i1> %mask, <8 x i8> undef) + ret <8 x i8> %ret +} + +define <8 x i8> @vector_compress_v8i8_passthru(<8 x i8> %passthru, <8 x i8> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8i8_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <8 x i8> @llvm.experimental.vector.compress.v8i8(<8 x i8> %v, <8 x i1> %mask, <8 x i8> %passthru) + ret <8 x i8> %ret +} + +define <1 x i16> @vector_compress_v1i16(<1 x i16> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <1 x i16> @llvm.experimental.vector.compress.v1i16(<1 x i16> %v, <1 x i1> %mask, <1 x i16> undef) + ret <1 x i16> %ret +} + +define <1 x i16> @vector_compress_v1i16_passthru(<1 x i16> %passthru, <1 x i16> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1i16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e16, mf4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <1 x i16> @llvm.experimental.vector.compress.v1i16(<1 x i16> %v, <1 x i1> %mask, <1 x i16> %passthru) + ret <1 x i16> %ret +} + +define <2 x i16> @vector_compress_v2i16(<2 x i16> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <2 x i16> @llvm.experimental.vector.compress.v2i16(<2 x i16> %v, <2 x i1> %mask, <2 x i16> undef) + ret <2 x i16> %ret +} + +define <2 x i16> @vector_compress_v2i16_passthru(<2 x i16> %passthru, <2 x i16> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2i16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <2 x i16> @llvm.experimental.vector.compress.v2i16(<2 x i16> %v, <2 x i1> %mask, <2 x i16> %passthru) + ret <2 x i16> %ret +} + +define <4 x i16> @vector_compress_v4i16(<4 x i16> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <4 x i16> @llvm.experimental.vector.compress.v4i16(<4 x i16> %v, <4 x i1> %mask, <4 x i16> undef) + ret <4 x i16> %ret +} + +define <4 x i16> @vector_compress_v4i16_passthru(<4 x i16> %passthru, <4 x i16> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4i16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <4 x i16> @llvm.experimental.vector.compress.v4i16(<4 x i16> %v, <4 x i1> %mask, <4 x i16> %passthru) + ret <4 x i16> %ret +} + +define <8 x i16> @vector_compress_v8i16(<8 x i16> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call <8 x i16> @llvm.experimental.vector.compress.v8i16(<8 x i16> %v, <8 x i1> %mask, <8 x i16> undef) + ret <8 x i16> %ret +} + +define <8 x i16> @vector_compress_v8i16_passthru(<8 x i16> %passthru, <8 x i16> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8i16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <8 x i16> @llvm.experimental.vector.compress.v8i16(<8 x i16> %v, <8 x i1> %mask, <8 x i16> %passthru) + ret <8 x i16> %ret +} + +define <1 x i32> @vector_compress_v1i32(<1 x i32> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <1 x i32> @llvm.experimental.vector.compress.v1i32(<1 x i32> %v, <1 x i1> %mask, <1 x i32> undef) + ret <1 x i32> %ret +} + +define <1 x i32> @vector_compress_v1i32_passthru(<1 x i32> %passthru, <1 x i32> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1i32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <1 x i32> @llvm.experimental.vector.compress.v1i32(<1 x i32> %v, <1 x i1> %mask, <1 x i32> %passthru) + ret <1 x i32> %ret +} + +define <2 x i32> @vector_compress_v2i32(<2 x i32> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call <2 x i32> @llvm.experimental.vector.compress.v2i32(<2 x i32> %v, <2 x i1> %mask, <2 x i32> undef) + ret <2 x i32> %ret +} + +define <2 x i32> @vector_compress_v2i32_passthru(<2 x i32> %passthru, <2 x i32> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2i32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <2 x i32> @llvm.experimental.vector.compress.v2i32(<2 x i32> %v, <2 x i1> %mask, <2 x i32> %passthru) + ret <2 x i32> %ret +} + +define <4 x i32> @vector_compress_v4i32(<4 x i32> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call <4 x i32> @llvm.experimental.vector.compress.v4i32(<4 x i32> %v, <4 x i1> %mask, <4 x i32> undef) + ret <4 x i32> %ret +} + +define <4 x i32> @vector_compress_v4i32_passthru(<4 x i32> %passthru, <4 x i32> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4i32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <4 x i32> @llvm.experimental.vector.compress.v4i32(<4 x i32> %v, <4 x i1> %mask, <4 x i32> %passthru) + ret <4 x i32> %ret +} + +define <8 x i32> @vector_compress_v8i32(<8 x i32> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call <8 x i32> @llvm.experimental.vector.compress.v8i32(<8 x i32> %v, <8 x i1> %mask, <8 x i32> undef) + ret <8 x i32> %ret +} + +define <8 x i32> @vector_compress_v8i32_passthru(<8 x i32> %passthru, <8 x i32> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8i32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call <8 x i32> @llvm.experimental.vector.compress.v8i32(<8 x i32> %v, <8 x i1> %mask, <8 x i32> %passthru) + ret <8 x i32> %ret +} + +define <1 x i64> @vector_compress_v1i64(<1 x i64> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call <1 x i64> @llvm.experimental.vector.compress.v1i64(<1 x i64> %v, <1 x i1> %mask, <1 x i64> undef) + ret <1 x i64> %ret +} + +define <1 x i64> @vector_compress_v1i64_passthru(<1 x i64> %passthru, <1 x i64> %v, <1 x i1> %mask) { +; CHECK-LABEL: vector_compress_v1i64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <1 x i64> @llvm.experimental.vector.compress.v1i64(<1 x i64> %v, <1 x i1> %mask, <1 x i64> %passthru) + ret <1 x i64> %ret +} + +define <2 x i64> @vector_compress_v2i64(<2 x i64> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call <2 x i64> @llvm.experimental.vector.compress.v2i64(<2 x i64> %v, <2 x i1> %mask, <2 x i64> undef) + ret <2 x i64> %ret +} + +define <2 x i64> @vector_compress_v2i64_passthru(<2 x i64> %passthru, <2 x i64> %v, <2 x i1> %mask) { +; CHECK-LABEL: vector_compress_v2i64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call <2 x i64> @llvm.experimental.vector.compress.v2i64(<2 x i64> %v, <2 x i1> %mask, <2 x i64> %passthru) + ret <2 x i64> %ret +} + +define <4 x i64> @vector_compress_v4i64(<4 x i64> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call <4 x i64> @llvm.experimental.vector.compress.v4i64(<4 x i64> %v, <4 x i1> %mask, <4 x i64> undef) + ret <4 x i64> %ret +} + +define <4 x i64> @vector_compress_v4i64_passthru(<4 x i64> %passthru, <4 x i64> %v, <4 x i1> %mask) { +; CHECK-LABEL: vector_compress_v4i64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call <4 x i64> @llvm.experimental.vector.compress.v4i64(<4 x i64> %v, <4 x i1> %mask, <4 x i64> %passthru) + ret <4 x i64> %ret +} + +define <8 x i64> @vector_compress_v8i64(<8 x i64> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vcompress.vm v12, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %ret = call <8 x i64> @llvm.experimental.vector.compress.v8i64(<8 x i64> %v, <8 x i1> %mask, <8 x i64> undef) + ret <8 x i64> %ret +} + +define <8 x i64> @vector_compress_v8i64_passthru(<8 x i64> %passthru, <8 x i64> %v, <8 x i1> %mask) { +; CHECK-LABEL: vector_compress_v8i64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v12, v0 +; CHECK-NEXT: ret + %ret = call <8 x i64> @llvm.experimental.vector.compress.v8i64(<8 x i64> %v, <8 x i1> %mask, <8 x i64> %passthru) + ret <8 x i64> %ret +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-compress.ll b/llvm/test/CodeGen/RISCV/rvv/vector-compress.ll new file mode 100644 index 0000000000000..85d72ad2fe9cb --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vector-compress.ll @@ -0,0 +1,794 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -verify-machineinstrs -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh %s -o - | FileCheck %s + +; Vector compress for i8 type + +define @vector_compress_nxv1i8( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1i8( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv1i8_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1i8_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1i8( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv2i8( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2i8( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv2i8_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2i8_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2i8( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv4i8( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4i8( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv4i8_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4i8_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4i8( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv8i8( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8i8( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv8i8_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8i8_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8i8( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv16i8( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16i8( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv16i8_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16i8_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16i8( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv32i8( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; CHECK-NEXT: vcompress.vm v12, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv32i8( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv32i8_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv32i8_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v12, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv32i8( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv64i8( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv64i8( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv64i8_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv64i8_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e8, m8, tu, ma +; CHECK-NEXT: vcompress.vm v8, v16, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv64i8( %data, %mask, %passthru) + ret %ret +} + +; Vector compress for i16 type + +define @vector_compress_nxv1i16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1i16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv1i16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1i16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1i16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv2i16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2i16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv2i16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2i16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2i16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv4i16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4i16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv4i16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4i16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4i16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv8i16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8i16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv8i16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8i16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8i16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv16i16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vcompress.vm v12, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16i16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv16i16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16i16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v12, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16i16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv32i16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv32i16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv32i16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv32i16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, tu, ma +; CHECK-NEXT: vcompress.vm v8, v16, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv32i16( %data, %mask, %passthru) + ret %ret +} + +; Vector compress for i32 type + +define @vector_compress_nxv1i32( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1i32( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv1i32_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1i32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1i32( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv2i32( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2i32( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv2i32_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2i32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2i32( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv4i32( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4i32( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv4i32_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4i32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4i32( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv8i32( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vcompress.vm v12, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8i32( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv8i32_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8i32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v12, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8i32( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv16i32( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16i32( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv16i32_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16i32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, tu, ma +; CHECK-NEXT: vcompress.vm v8, v16, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16i32( %data, %mask, %passthru) + ret %ret +} + +; Vector compress for i64 type + +define @vector_compress_nxv1i64( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1i64( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv1i64_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1i64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1i64( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv2i64( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2i64( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv2i64_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2i64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2i64( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv4i64( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vcompress.vm v12, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4i64( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv4i64_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4i64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v12, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4i64( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv8i64( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8i64( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv8i64_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8i64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, tu, ma +; CHECK-NEXT: vcompress.vm v8, v16, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8i64( %data, %mask, %passthru) + ret %ret +} + +; Vector compress for f16 type + +define @vector_compress_nxv1f16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1f16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv1f16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1f16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1f16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv2f16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2f16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv2f16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2f16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2f16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv4f16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4f16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv4f16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4f16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4f16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv8f16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8f16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv8f16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8f16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8f16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv16f16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vcompress.vm v12, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16f16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv16f16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16f16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v12, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16f16( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv32f16( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv32f16( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv32f16_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv32f16_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m8, tu, ma +; CHECK-NEXT: vcompress.vm v8, v16, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv32f16( %data, %mask, %passthru) + ret %ret +} + +; Vector compress for f32 type + +define @vector_compress_nxv1f32( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1f32( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv1f32_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1f32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1f32( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv2f32( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2f32( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv2f32_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2f32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2f32( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv4f32( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4f32( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv4f32_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4f32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4f32( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv8f32( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vcompress.vm v12, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8f32( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv8f32_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8f32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v12, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8f32( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv16f32( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16f32( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv16f32_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv16f32_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m8, tu, ma +; CHECK-NEXT: vcompress.vm v8, v16, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv16f32( %data, %mask, %passthru) + ret %ret +} + +; Vector compress for f64 type + +define @vector_compress_nxv1f64( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vcompress.vm v9, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1f64( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv1f64_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv1f64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, tu, ma +; CHECK-NEXT: vcompress.vm v8, v9, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv1f64( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv2f64( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vcompress.vm v10, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2f64( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv2f64_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv2f64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, tu, ma +; CHECK-NEXT: vcompress.vm v8, v10, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv2f64( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv4f64( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vcompress.vm v12, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4f64( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv4f64_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv4f64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, tu, ma +; CHECK-NEXT: vcompress.vm v8, v12, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv4f64( %data, %mask, %passthru) + ret %ret +} + +define @vector_compress_nxv8f64( %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vcompress.vm v16, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8f64( %data, %mask, undef) + ret %ret +} + +define @vector_compress_nxv8f64_passthru( %passthru, %data, %mask) { +; CHECK-LABEL: vector_compress_nxv8f64_passthru: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m8, tu, ma +; CHECK-NEXT: vcompress.vm v8, v16, v0 +; CHECK-NEXT: ret + %ret = call @llvm.experimental.vector.compress.nxv8f64( %data, %mask, %passthru) + ret %ret +}