diff --git a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp index 2089f5dda6fe5..4c424440aa972 100644 --- a/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp +++ b/llvm/lib/Target/RISCV/RISCVFoldMasks.cpp @@ -50,10 +50,7 @@ class RISCVFoldMasks : public MachineFunctionPass { bool convertToUnmasked(MachineInstr &MI) const; bool convertVMergeToVMv(MachineInstr &MI) const; - bool isAllOnesMask(const MachineInstr *MaskDef) const; - - /// Maps uses of V0 to the corresponding def of V0. - DenseMap V0Defs; + bool isAllOnesMask(const MachineOperand &MaskOp) const; }; } // namespace @@ -62,12 +59,22 @@ char RISCVFoldMasks::ID = 0; INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false) -bool RISCVFoldMasks::isAllOnesMask(const MachineInstr *MaskDef) const { - assert(MaskDef && MaskDef->isCopy() && - MaskDef->getOperand(0).getReg() == RISCV::V0); +bool RISCVFoldMasks::isAllOnesMask(const MachineOperand &MaskOp) const { + if (!MaskOp.isReg()) + return false; + + Register MaskReg = MaskOp.getReg(); + if (!MaskReg.isVirtual()) + return false; + + MachineInstr *MaskDef = MRI->getVRegDef(MaskReg); + if (!MaskDef || !MaskDef->isCopy()) + return false; + Register SrcReg = TRI->lookThruCopyLike(MaskDef->getOperand(1).getReg(), MRI); if (!SrcReg.isVirtual()) return false; + MaskDef = MRI->getVRegDef(SrcReg); if (!MaskDef) return false; @@ -116,8 +123,7 @@ bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI) const { TRI->lookThruCopyLike(FalseReg, MRI)) return false; - assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0); - if (!isAllOnesMask(V0Defs.lookup(&MI))) + if (!isAllOnesMask(MI.getOperand(4))) return false; MI.setDesc(TII->get(NewOpc)); @@ -140,7 +146,9 @@ bool RISCVFoldMasks::convertToUnmasked(MachineInstr &MI) const { if (!I) return false; - if (!isAllOnesMask(V0Defs.lookup(&MI))) + // TODO: Increment all MaskOpIdxs in tablegen by num of explicit defs? + unsigned MaskOpIdx = I->MaskOpIdx + MI.getNumExplicitDefs(); + if (!isAllOnesMask(MI.getOperand(MaskOpIdx))) return false; // There are two classes of pseudos in the table - compares and @@ -160,9 +168,6 @@ bool RISCVFoldMasks::convertToUnmasked(MachineInstr &MI) const { (void)HasPolicyOp; MI.setDesc(MCID); - - // TODO: Increment all MaskOpIdxs in tablegen by num of explicit defs? - unsigned MaskOpIdx = I->MaskOpIdx + MI.getNumExplicitDefs(); MI.removeOperand(MaskOpIdx); // The unmasked pseudo will no longer be constrained to the vrnov0 reg class, @@ -193,24 +198,6 @@ bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; - // Masked pseudos coming out of isel will have their mask operand in the form: - // - // $v0:vr = COPY %mask:vr - // %x:vr = Pseudo_MASK %a:vr, %b:br, $v0:vr - // - // Because $v0 isn't in SSA, keep track of its definition at each use so we - // can check mask operands. - for (const MachineBasicBlock &MBB : MF) { - const MachineInstr *CurrentV0Def = nullptr; - for (const MachineInstr &MI : MBB) { - if (MI.readsRegister(RISCV::V0, TRI)) - V0Defs[&MI] = CurrentV0Def; - - if (MI.definesRegister(RISCV::V0, TRI)) - CurrentV0Def = &MI; - } - } - for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { Changed |= convertToUnmasked(MI); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index f99dc0b857636..728117323bcef 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -296,7 +296,6 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands( bool IsMasked, bool IsStridedOrIndexed, SmallVectorImpl &Operands, bool IsLoad, MVT *IndexVT) { SDValue Chain = Node->getOperand(0); - SDValue Glue; Operands.push_back(Node->getOperand(CurOp++)); // Base pointer. @@ -307,11 +306,8 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands( } if (IsMasked) { - // Mask needs to be copied to V0. SDValue Mask = Node->getOperand(CurOp++); - Chain = CurDAG->getCopyToReg(Chain, DL, RISCV::V0, Mask, SDValue()); - Glue = Chain.getValue(1); - Operands.push_back(CurDAG->getRegister(RISCV::V0, Mask.getValueType())); + Operands.push_back(Mask); } SDValue VL; selectVLOp(Node->getOperand(CurOp++), VL); @@ -333,8 +329,6 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands( } Operands.push_back(Chain); // Chain. - if (Glue) - Operands.push_back(Glue); } void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, @@ -1670,12 +1664,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { return; } - // Mask needs to be copied to V0. - SDValue Chain = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, - RISCV::V0, Mask, SDValue()); - SDValue Glue = Chain.getValue(1); - SDValue V0 = CurDAG->getRegister(RISCV::V0, VT); - // Otherwise use // vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0 // The result is mask undisturbed. @@ -1683,7 +1671,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { // the agnostic result can be either undisturbed or all 1. SDValue Cmp = SDValue( CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT, - {MaskedOff, Src1, Src2, V0, VL, SEW, Glue}), + {MaskedOff, Src1, Src2, Mask, VL, SEW}), 0); // vmxor.mm vd, vd, v0 is used to update active value. ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT, @@ -3426,32 +3414,7 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) { return false; } -static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { - // Check that we're using V0 as a mask register. - if (!isa(MaskOp) || - cast(MaskOp)->getReg() != RISCV::V0) - return false; - - // The glued user defines V0. - const auto *Glued = GlueOp.getNode(); - - if (!Glued || Glued->getOpcode() != ISD::CopyToReg) - return false; - - // Check that we're defining V0 as a mask register. - if (!isa(Glued->getOperand(1)) || - cast(Glued->getOperand(1))->getReg() != RISCV::V0) - return false; - - // Check the instruction defining V0; it needs to be a VMSET pseudo. - SDValue MaskSetter = Glued->getOperand(2); - - // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came - // from an extract_subvector or insert_subvector. - if (MaskSetter->isMachineOpcode() && - MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS) - MaskSetter = MaskSetter->getOperand(0); - +static bool usesAllOnesMask(SDValue MaskOp) { const auto IsVMSet = [](unsigned Opc) { return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 || Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 || @@ -3462,14 +3425,12 @@ static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) { // TODO: Check that the VMSET is the expected bitwidth? The pseudo has // undefined behaviour if it's the wrong bitwidth, so we could choose to // assume that it's all-ones? Same applies to its VL. - return MaskSetter->isMachineOpcode() && - IsVMSet(MaskSetter.getMachineOpcode()); + return MaskOp->isMachineOpcode() && IsVMSet(MaskOp.getMachineOpcode()); } // Return true if we can make sure mask of N is all-ones mask. static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) { - return usesAllOnesMask(N->getOperand(MaskOpIdx), - N->getOperand(N->getNumOperands() - 1)); + return usesAllOnesMask(N->getOperand(MaskOpIdx)); } static bool isImplicitDef(SDValue V) { @@ -3515,11 +3476,6 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) { Ops.push_back(Op); } - // Transitively apply any node glued to our new node. - const auto *Glued = N->getGluedNode(); - if (auto *TGlued = Glued->getGluedNode()) - Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1)); - MachineSDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); @@ -3584,7 +3540,7 @@ static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) { // The resulting policy is the effective policy the vmerge would have had, // i.e. whether or not it's merge operand was implicit-def. bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { - SDValue Merge, False, True, VL, Mask, Glue; + SDValue Merge, False, True, VL, Mask; // A vmv.v.v is equivalent to a vmerge with an all-ones mask. if (IsVMv(N)) { Merge = N->getOperand(0); @@ -3600,11 +3556,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { True = N->getOperand(2); Mask = N->getOperand(3); VL = N->getOperand(4); - // We always have a glue node for the mask at v0. - Glue = N->getOperand(N->getNumOperands() - 1); } - assert(!Mask || cast(Mask)->getReg() == RISCV::V0); - assert(!Glue || Glue.getValueType() == MVT::Glue); // We require that either merge and false are the same, or that merge // is undefined. @@ -3639,7 +3591,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // When Mask is not a true mask, this transformation is illegal for some // operations whose results are affected by mask, like viota.m. - if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue)) + if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask)) return false; // If True has a merge operand then it needs to be the same as vmerge's False, @@ -3664,7 +3616,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { return false; // FIXME: Support mask agnostic True instruction which would have an // undef merge operand. - if (Mask && !usesAllOnesMask(Mask, Glue)) + if (Mask && !usesAllOnesMask(Mask)) return false; } @@ -3691,8 +3643,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { if (Mask) LoopWorklist.push_back(Mask.getNode()); LoopWorklist.push_back(VL.getNode()); - if (Glue) - LoopWorklist.push_back(Glue.getNode()); if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist)) return false; } @@ -3737,11 +3687,8 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // From the preconditions we checked above, we know the mask and thus glue // for the result node will be taken from True. - if (IsMasked) { + if (IsMasked) Mask = True->getOperand(Info->MaskOpIdx); - Glue = True->getOperand(True->getNumOperands() - 1); - assert(Glue.getValueType() == MVT::Glue); - } // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create // an all-ones mask to use. else if (IsVMv(N)) { @@ -3749,13 +3696,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags)); ElementCount EC = N->getValueType(0).getVectorElementCount(); MVT MaskVT = MVT::getVectorVT(MVT::i1, EC); - - SDValue AllOnesMask = - SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0); - SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, - RISCV::V0, AllOnesMask, SDValue()); - Mask = CurDAG->getRegister(RISCV::V0, MaskVT); - Glue = MaskCopy.getValue(1); + Mask = SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0); } unsigned MaskedOpc = Info->MaskedPseudo; @@ -3806,9 +3747,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { if (HasChainOp) Ops.push_back(True.getOperand(TrueChainOpIdx)); - // Add the glue for the CopyToReg of mask->v0. - Ops.push_back(Glue); - MachineSDNode *Result = CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops); Result->setFlags(True->getFlags()); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 70fe7da85be0e..804b9e8ffb2fd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -279,6 +279,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { unsigned getUndefInitOpcode(unsigned RegClassID) const override { switch (RegClassID) { + case RISCV::VMV0RegClassID: case RISCV::VRRegClassID: return RISCV::PseudoRVVInitUndefM1; case RISCV::VRM2RegClassID: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index 0b8317925097b..cd5caa47f9718 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -4026,7 +4026,7 @@ class VPatUnaryMask(intrinsic_name#"_mask") (result_type result_reg_class:$merge), (op2_type op2_reg_class:$rs2), - (mask_type V0), + (mask_type VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast( !if(isSEWAware, @@ -4034,7 +4034,7 @@ class VPatUnaryMask; + (mask_type VMV0:$vm), GPR:$vl, log2sew, (XLenVT timm:$policy))>; class VPatUnaryMaskRoundingMode(intrinsic_name#"_mask") (result_type result_reg_class:$merge), (op2_type op2_reg_class:$rs2), - (mask_type V0), + (mask_type VMV0:$vm), (XLenVT timm:$round), VLOpFrag, (XLenVT timm:$policy))), (!cast( @@ -4059,7 +4059,7 @@ class VPatUnaryMaskRoundingMode; @@ -4081,12 +4081,12 @@ class VPatMaskUnaryMask(intrinsic_name#"_mask") (mti.Mask VR:$merge), (mti.Mask VR:$rs2), - (mti.Mask V0), + (mti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst#"_M_"#mti.BX#"_MASK") (mti.Mask VR:$merge), (mti.Mask VR:$rs2), - (mti.Mask V0), GPR:$vl, mti.Log2SEW, TU_MU)>; + (mti.Mask VMV0:$vm), GPR:$vl, mti.Log2SEW, TU_MU)>; class VPatUnaryAnyMask(inst#"_MASK") (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew)>; + (mask_type VMV0:$vm), GPR:$vl, sew)>; class VPatBinaryMaskTA(inst#"_MASK") (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>; + (mask_type VMV0:$vm), GPR:$vl, sew, (XLenVT timm:$policy))>; class VPatBinaryMaskTARoundingMode(inst#"_MASK") (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), + (mask_type VMV0:$vm), (XLenVT timm:$round), GPR:$vl, sew, (XLenVT timm:$policy))>; @@ -4292,13 +4292,13 @@ class VPatBinaryMaskSwapped(inst#"_MASK") (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew)>; + (mask_type VMV0:$vm), GPR:$vl, sew)>; class VPatTiedBinaryNoMask(inst#"_MASK_TIED") (result_type result_reg_class:$merge), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>; + (mask_type VMV0:$vm), GPR:$vl, sew, (XLenVT timm:$policy))>; class VPatTiedBinaryMaskRoundingMode(inst#"_MASK_TIED") (result_type result_reg_class:$merge), (op2_type op2_kind:$rs2), - (mask_type V0), + (mask_type VMV0:$vm), (XLenVT timm:$round), GPR:$vl, sew, (XLenVT timm:$policy))>; @@ -4546,13 +4546,13 @@ class VPatTernaryMask(inst#"_"#kind#"_"#vlmul.MX # "_MASK") result_reg_class:$rs3, (op1_type op1_reg_class:$rs1), op2_kind:$rs2, - (mask_type V0), + (mask_type VMV0:$vm), GPR:$vl, sew)>; class VPatTernaryMaskPolicy(inst#"_"#kind#"_"#vlmul.MX # "_MASK") result_reg_class:$rs3, (op1_type op1_reg_class:$rs1), op2_kind:$rs2, - (mask_type V0), + (mask_type VMV0:$vm), GPR:$vl, sew, (XLenVT timm:$policy))>; class VPatTernaryMaskPolicyRoundingMode(!if(isSEWAware, @@ -4606,7 +4606,7 @@ class VPatTernaryMaskPolicyRoundingMode; @@ -4626,13 +4626,13 @@ class VPatTernaryMaskTA(inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)# "_MASK") result_reg_class:$rs3, (op1_type op1_reg_class:$rs1), op2_kind:$rs2, - (mask_type V0), + (mask_type VMV0:$vm), GPR:$vl, log2sew, TAIL_AGNOSTIC)>; class VPatTernaryMaskTARoundingMode(inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)# "_MASK") result_reg_class:$rs3, (op1_type op1_reg_class:$rs1), op2_kind:$rs2, - (mask_type V0), + (mask_type VMV0:$vm), (XLenVT timm:$round), GPR:$vl, log2sew, TAIL_AGNOSTIC)>; @@ -4670,9 +4670,9 @@ multiclass VPatUnaryS_M(inst#"_M_"#mti.BX) $rs1, GPR:$vl, mti.Log2SEW)>; def : Pat<(XLenVT (!cast(intrinsic_name # "_mask") - (mti.Mask VR:$rs1), (mti.Mask V0), VLOpFrag)), + (mti.Mask VR:$rs1), (mti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst#"_M_"#mti.BX#"_MASK") $rs1, - (mti.Mask V0), GPR:$vl, mti.Log2SEW)>; + (mti.Mask VMV0:$vm), GPR:$vl, mti.Log2SEW)>; } } @@ -4760,9 +4760,9 @@ multiclass VPatNullaryV { vti.RegClass:$merge, GPR:$vl, vti.Log2SEW, TU_MU)>; def : Pat<(vti.Vector (!cast(intrinsic # "_mask") (vti.Vector vti.RegClass:$merge), - (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))), + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction#"_V_" # vti.LMul.MX # "_MASK") - vti.RegClass:$merge, (vti.Mask V0), + vti.RegClass:$merge, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; } } @@ -4862,13 +4862,13 @@ multiclass VPatBinaryCarryInTAIL(inst#"_"#kind#"_"#vlmul.MX) (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew)>; + (mask_type VMV0:$vm), GPR:$vl, sew)>; } multiclass VPatBinaryCarryIn(intrinsic) (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), + (mask_type VMV0:$vm), VLOpFrag)), (!cast(inst#"_"#kind#"_"#vlmul.MX) (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), - (mask_type V0), GPR:$vl, sew)>; + (mask_type VMV0:$vm), GPR:$vl, sew)>; } multiclass VPatBinaryMaskOut; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } } @@ -6277,14 +6277,14 @@ foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (int_riscv_vrsub_mask (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$rs1), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast("PseudoVSUB_VV_"#vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; @@ -6302,14 +6302,14 @@ foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (int_riscv_vsub_mask (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast("PseudoVADD_VI_"#vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, (NegImm simm5_plus1:$rs2), - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; @@ -6964,14 +6964,14 @@ foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs1), (XLenVT 1), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast("PseudoVADD_VV_"#vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs1, - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; @@ -7309,9 +7309,9 @@ foreach fvti = AllFloatVectors in { def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$merge), (fvti.Vector fvti.RegClass:$rs2), (fvti.Scalar (fpimm0)), - (fvti.Mask V0), VLOpFrag)), + (fvti.Mask VMV0:$vm), VLOpFrag)), (instr fvti.RegClass:$merge, fvti.RegClass:$rs2, 0, - (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 82ee4b0cbce90..3397d5516331a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -980,16 +980,16 @@ foreach vtiToWti = AllWidenableIntVectors in { (!cast("PseudoVWADDU_VV_"#vti.LMul.MX) (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TA_MA)>; - def : Pat<(shl (wti.Vector (riscv_sext_vl_oneuse (vti.Vector vti.RegClass:$rs1), (vti.Mask V0), VLOpFrag)), + def : Pat<(shl (wti.Vector (riscv_sext_vl_oneuse (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))), (!cast("PseudoVWADD_VV_"#vti.LMul.MX#"_MASK") (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(shl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs1), (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(shl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs1), (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))), (!cast("PseudoVWADDU_VV_"#vti.LMul.MX#"_MASK") (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -1133,24 +1133,24 @@ defm : VPatWidenMulAddSDNode_VX; // 11.15. Vector Integer Merge Instructions foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(vti.Vector (vselect (vti.Mask V0), vti.RegClass:$rs1, + def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), vti.RegClass:$rs1, vti.RegClass:$rs2)), (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0), + vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask VMV0:$vm), vti.AVL, vti.Log2SEW)>; - def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat XLenVT:$rs1), - vti.RegClass:$rs2)), + def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat XLenVT:$rs1), + vti.RegClass:$rs2)), (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>; + vti.RegClass:$rs2, GPR:$rs1, (vti.Mask VMV0:$vm), vti.AVL, vti.Log2SEW)>; - def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat_simm5 simm5:$rs1), + def : Pat<(vti.Vector (vselect (vti.Mask VMV0:$vm), (SplatPat_simm5 simm5:$rs1), vti.RegClass:$rs2)), (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), - vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>; + vti.RegClass:$rs2, simm5:$rs1, (vti.Mask VMV0:$vm), vti.AVL, vti.Log2SEW)>; } } @@ -1395,30 +1395,30 @@ defm : VPatFPSetCCSDNode_VV_VF_FV; foreach fvti = AllFloatVectors in { defvar ivti = GetIntVTypeInfo.Vti; let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1, + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1, fvti.RegClass:$rs2)), (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), + fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (vselect (fvti.Mask V0), + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2)), (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), - fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; + fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>; } let Predicates = GetVTypePredicates.Predicates in - def : Pat<(fvti.Vector (vselect (fvti.Mask V0), + def : Pat<(fvti.Vector (vselect (fvti.Mask VMV0:$vm), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2)), (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), - (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; + (fvti.Mask VMV0:$vm), fvti.AVL, fvti.Log2SEW)>; } // 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index f2d97ba147254..42fee1aaefb52 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -622,7 +622,7 @@ class VPatBinaryVL_V( !if(isSEWAware, @@ -631,7 +631,7 @@ class VPatBinaryVL_V; + (mask_type VMV0:$vm), GPR:$vl, log2sew, TAIL_AGNOSTIC)>; class VPatBinaryVL_V_RM( !if(isSEWAware, @@ -659,7 +659,7 @@ class VPatBinaryVL_V_RM(instruction_name#"_"#suffix#"_"# vlmul.MX#"_MASK_TIED") result_reg_class:$rs1, op2_reg_class:$rs2, - (mask_type V0), GPR:$vl, sew, TU_MU)>; + (mask_type VMV0:$vm), GPR:$vl, sew, TU_MU)>; multiclass VPatTiedBinaryNoMaskVL_V_RM( !if(isSEWAware, @@ -791,7 +791,7 @@ class VPatBinaryVL_XI; + (mask_type VMV0:$vm), GPR:$vl, log2sew, TAIL_AGNOSTIC)>; multiclass VPatBinaryVL_VV_VX vtilist = AllIntegerVectors, @@ -906,7 +906,7 @@ class VPatBinaryVL_VF( !if(isSEWAware, @@ -915,7 +915,7 @@ class VPatBinaryVL_VF; + (mask_type VMV0:$vm), GPR:$vl, log2sew, TAIL_AGNOSTIC)>; class VPatBinaryVL_VF_RM( !if(isSEWAware, @@ -941,7 +941,7 @@ class VPatBinaryVL_VF_RM( !if(isSEWAware, @@ -994,7 +994,7 @@ multiclass VPatBinaryFPVL_R_VF; + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -1005,7 +1005,7 @@ multiclass VPatBinaryFPVL_R_VF_RM( !if(isSEWAware, @@ -1013,7 +1013,7 @@ multiclass VPatBinaryFPVL_R_VF_RM(instruction_name#"_VV_"#vti.LMul.MX#"_MASK") VR:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } // Inherits from VPatIntegerSetCCVL_VV and adds a pattern with operands swapped. @@ -1042,11 +1042,11 @@ multiclass VPatIntegerSetCCVL_VV_Swappable(instruction_name#"_VV_"#vti.LMul.MX#"_MASK") VR:$merge, vti.RegClass:$rs1, - vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } multiclass VPatIntegerSetCCVL_VX_Swappable; + GPR:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat (XLenVT GPR:$rs2)), (vti.Vector vti.RegClass:$rs1), invcc, VR:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (instruction_masked VR:$merge, vti.RegClass:$rs1, - GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + GPR:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } multiclass VPatIntegerSetCCVL_VI_Swappable; // FIXME: Can do some canonicalization to remove these patterns. def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat_simm5 simm5:$rs2), (vti.Vector vti.RegClass:$rs1), invcc, VR:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (instruction_masked VR:$merge, vti.RegClass:$rs1, - simm5:$rs2, (vti.Mask V0), GPR:$vl, + simm5:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } @@ -1099,20 +1099,20 @@ multiclass VPatIntegerSetCCVL_VIPlus1_Swappable; // FIXME: Can do some canonicalization to remove these patterns. def : Pat<(vti.Mask (riscv_setcc_vl (splatpat_kind simm5:$rs2), (vti.Vector vti.RegClass:$rs1), invcc, VR:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (instruction_masked VR:$merge, vti.RegClass:$rs1, - (DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl, + (DecImm simm5:$rs2), (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } @@ -1125,31 +1125,31 @@ multiclass VPatFPSetCCVL_VV_VF_FV(inst_name#"_VV_"#fvti.LMul.MX#"_MASK") VR:$merge, fvti.RegClass:$rs1, - fvti.RegClass:$rs2, (fvti.Mask V0), + fvti.RegClass:$rs2, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Mask (vop (fvti.Vector fvti.RegClass:$rs1), (SplatFPOp fvti.ScalarRegClass:$rs2), cc, VR:$merge, - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") VR:$merge, fvti.RegClass:$rs1, - fvti.ScalarRegClass:$rs2, (fvti.Mask V0), + fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; def : Pat<(fvti.Mask (vop (SplatFPOp fvti.ScalarRegClass:$rs2), (fvti.Vector fvti.RegClass:$rs1), cc, VR:$merge, - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK") VR:$merge, fvti.RegClass:$rs1, - fvti.ScalarRegClass:$rs2, (fvti.Mask V0), + fvti.ScalarRegClass:$rs2, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; } } @@ -1163,11 +1163,11 @@ multiclass VPatExtendVL_V.Predicates, GetVTypePredicates.Predicates) in def : Pat<(vti.Vector (vop (fti.Vector fti.RegClass:$rs2), - (fti.Mask V0), VLOpFrag)), + (fti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst_name#"_"#suffix#"_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), fti.RegClass:$rs2, - (fti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (fti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -1179,11 +1179,11 @@ multiclass VPatConvertFP2IVL_V { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#ivti.LMul.MX#"_MASK") (ivti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), GPR:$vl, ivti.Log2SEW, TA_MA)>; + (fvti.Mask VMV0:$vm), GPR:$vl, ivti.Log2SEW, TA_MA)>; } } @@ -1193,11 +1193,11 @@ multiclass VPatConvertFP2IVL_V_RM.Predicates, GetVTypePredicates.Predicates) in def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#ivti.LMul.MX#"_MASK") (ivti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1212,11 +1212,11 @@ multiclass VPatConvertFP2I_RM_VL_V.Predicates, GetVTypePredicates.Predicates) in def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), (XLenVT timm:$frm), + (fvti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), (!cast(instruction_name#"_"#ivti.LMul.MX#"_MASK") (ivti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), timm:$frm, GPR:$vl, ivti.Log2SEW, + (fvti.Mask VMV0:$vm), timm:$frm, GPR:$vl, ivti.Log2SEW, TA_MA)>; } } @@ -1227,11 +1227,11 @@ multiclass VPatConvertI2FPVL_V_RM.Predicates, GetVTypePredicates.Predicates) in def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), - (ivti.Mask V0), + (ivti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, - (ivti.Mask V0), + (ivti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1245,11 +1245,11 @@ multiclass VPatConvertI2FP_RM_VL_V { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), - (ivti.Mask V0), (XLenVT timm:$frm), + (ivti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, - (ivti.Mask V0), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; + (ivti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; } } @@ -1262,11 +1262,11 @@ multiclass VPatWConvertFP2IVL_V let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") (iwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), GPR:$vl, fvti.Log2SEW, TA_MA)>; + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>; } } @@ -1277,11 +1277,11 @@ multiclass VPatWConvertFP2IVL_V_RM.Predicates, GetVTypePredicates.Predicates) in def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") (iwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1297,11 +1297,11 @@ multiclass VPatWConvertFP2I_RM_VL_V { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), (XLenVT timm:$frm), + (fvti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_MASK") (iwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; + (fvti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; } } @@ -1313,11 +1313,11 @@ multiclass VPatWConvertI2FPVL_V.Predicates, GetVTypePredicates.Predicates) in def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1), - (ivti.Mask V0), + (ivti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#ivti.LMul.MX#"_E"#ivti.SEW#"_MASK") (fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1, - (ivti.Mask V0), + (ivti.Mask VMV0:$vm), GPR:$vl, ivti.Log2SEW, TA_MA)>; } } @@ -1334,11 +1334,11 @@ multiclass VPatNConvertFP2IVL_W.Predicates, GetVTypePredicates.Predicates) in def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), + (fwti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (fwti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -1352,11 +1352,11 @@ multiclass VPatNConvertFP2IVL_W_RM.Predicates, GetVTypePredicates.Predicates) in def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), + (fwti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask V0), + (fwti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1371,11 +1371,11 @@ multiclass VPatNConvertFP2I_RM_VL_W { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), (XLenVT timm:$frm), + (fwti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), (!cast(instruction_name#"_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask V0), timm:$frm, GPR:$vl, vti.Log2SEW, TA_MA)>; + (fwti.Mask VMV0:$vm), timm:$frm, GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -1387,11 +1387,11 @@ multiclass VPatNConvertI2FPVL_W_RM.Predicates, GetVTypePredicates.Predicates) in def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1), - (iwti.Mask V0), + (iwti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1, - (iwti.Mask V0), + (iwti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1406,11 +1406,11 @@ multiclass VPatNConvertI2FP_RM_VL_W { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1), - (iwti.Mask V0), (XLenVT timm:$frm), + (iwti.Mask VMV0:$vm), (XLenVT timm:$frm), VLOpFrag)), (!cast(instruction_name#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1, - (iwti.Mask V0), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; + (iwti.Mask VMV0:$vm), timm:$frm, GPR:$vl, fvti.Log2SEW, TA_MA)>; } } @@ -1420,13 +1420,13 @@ multiclass VPatReductionVL { let Predicates = GetVTypePredicates.Predicates in { def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), VR:$rs2, - (vti.Mask V0), VLOpFrag, + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), (vti_m1.Vector VR:$rs2), - (vti.Mask V0), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; } } } @@ -1437,13 +1437,13 @@ multiclass VPatReductionVL_RM let Predicates = GetVTypePredicates.Predicates in { def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), VR:$rs2, - (vti.Mask V0), VLOpFrag, + (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), (vti_m1.Vector VR:$rs2), - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1502,11 +1502,11 @@ multiclass VPatWidenReductionVL.Predicates) in { def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))), - VR:$rs2, (vti.Mask V0), VLOpFrag, + VR:$rs2, (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), - (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (wti_m1.Vector VR:$rs2), (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; } } @@ -1521,11 +1521,11 @@ multiclass VPatWidenReductionVL_RM.Predicates) in { def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))), - VR:$rs2, (vti.Mask V0), VLOpFrag, + VR:$rs2, (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), - (wti_m1.Vector VR:$rs2), (vti.Mask V0), + (wti_m1.Vector VR:$rs2), (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1544,11 +1544,11 @@ multiclass VPatWidenReductionVL_Ext_VL.Predicates) in { def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)), - VR:$rs2, (vti.Mask V0), VLOpFrag, + VR:$rs2, (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), - (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (wti_m1.Vector VR:$rs2), (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; } } @@ -1563,11 +1563,11 @@ multiclass VPatWidenReductionVL_Ext_VL_RM.Predicates) in { def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge), (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)), - VR:$rs2, (vti.Mask V0), VLOpFrag, + VR:$rs2, (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK") (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), - (wti_m1.Vector VR:$rs2), (vti.Mask V0), + (wti_m1.Vector VR:$rs2), (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1693,10 +1693,10 @@ multiclass VPatNarrowShiftExtVL_WV(instruction_name#"_WV_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -1739,7 +1739,7 @@ multiclass VPatMultiplyAccVL_VV_VX { foreach vti = AllIntegerVectors in { defvar suffix = vti.LMul.MX; let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), @@ -1747,8 +1747,8 @@ multiclass VPatMultiplyAccVL_VV_VX { vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), @@ -1756,8 +1756,8 @@ multiclass VPatMultiplyAccVL_VV_VX { vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_VX_"# suffix #"_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), @@ -1765,8 +1765,8 @@ multiclass VPatMultiplyAccVL_VV_VX { vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (op vti.RegClass:$rd, (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, srcvalue, (vti.Mask true_mask), VLOpFrag), @@ -1774,7 +1774,7 @@ multiclass VPatMultiplyAccVL_VV_VX { vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_VX_"# suffix #"_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } } @@ -1788,17 +1788,17 @@ multiclass VPatWidenMultiplyAddVL_VV_VX { def : Pat<(vwmacc_op (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), (wti.Vector wti.RegClass:$rd), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast(instr_name#"_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vwmacc_op (SplatPat XLenVT:$rs1), (vti.Vector vti.RegClass:$rs2), (wti.Vector wti.RegClass:$rd), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast(instr_name#"_VX_"#vti.LMul.MX#"_MASK") wti.RegClass:$rd, vti.ScalarRegClass:$rs1, - vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -1831,19 +1831,19 @@ multiclass VPatFPMulAddVL_VV_VF defvar suffix = vti.LMul.MX; let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd, - vti.RegClass:$rs2, (vti.Mask V0), + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rd, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } } @@ -1853,11 +1853,11 @@ multiclass VPatFPMulAddVL_VV_VF_RM.Predicates in { def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd, - vti.RegClass:$rs2, (vti.Mask V0), + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1865,11 +1865,11 @@ multiclass VPatFPMulAddVL_VV_VF_RM(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1882,34 +1882,34 @@ multiclass VPatFPMulAccVL_VV_VF { foreach vti = AllFloatVectors in { defvar suffix = vti.LMul.MX; let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } } @@ -1918,46 +1918,46 @@ multiclass VPatFPMulAccVL_VV_VF_RM { foreach vti = AllFloatVectors in { defvar suffix = vti.LMul.MX # "_E" # vti.SEW; let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_VV_"# suffix #"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_vmerge_vl (vti.Mask V0), + def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), vti.RegClass:$rd, undef, VLOpFrag), (!cast(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -1974,18 +1974,18 @@ multiclass VPatWidenFPMulAccVL_VV_VF { GetVTypePredicates.Predicates) in { def : Pat<(vop (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), - (wti.Vector wti.RegClass:$rd), (vti.Mask V0), + (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm), VLOpFrag), (!cast(instruction_name#"_VV_"#vti.LMul.MX #"_MASK") wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(vop (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), (vti.Vector vti.RegClass:$rs2), - (wti.Vector wti.RegClass:$rd), (vti.Mask V0), + (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm), VLOpFrag), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX #"_MASK") wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } } @@ -1999,22 +1999,22 @@ multiclass VPatWidenFPMulAccVL_VV_VF_RM { GetVTypePredicates.Predicates) in { def : Pat<(vop (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), - (wti.Vector wti.RegClass:$rd), (vti.Mask V0), + (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm), VLOpFrag), (!cast(instruction_name#"_VV_"#suffix#"_MASK") wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(vop (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), (vti.Vector vti.RegClass:$rs2), - (wti.Vector wti.RegClass:$rd), (vti.Mask V0), + (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm), VLOpFrag), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#suffix#"_MASK") wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -2028,20 +2028,20 @@ multiclass VPatSlideVL_VX_VI { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd), (vti.Vector vti.RegClass:$rs1), - uimm5:$rs2, (vti.Mask V0), + uimm5:$rs2, (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VI_"#vti.LMul.MX#"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, uimm5:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rd), (vti.Vector vti.RegClass:$rs1), - GPR:$rs2, (vti.Mask V0), + GPR:$rs2, (vti.Mask VMV0:$vm), VLOpFrag, (XLenVT timm:$policy))), (!cast(instruction_name#"_VX_"#vti.LMul.MX#"_MASK") vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>; } } @@ -2052,10 +2052,10 @@ multiclass VPatSlide1VL_VX { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rs3), (vti.Vector vti.RegClass:$rs1), - GPR:$rs2, (vti.Mask V0), VLOpFrag)), + GPR:$rs2, (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_VX_"#vti.LMul.MX#"_MASK") vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; } } } @@ -2065,10 +2065,10 @@ multiclass VPatSlide1VL_VF { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (vop (vti.Vector vti.RegClass:$rs3), (vti.Vector vti.RegClass:$rs1), - vti.Scalar:$rs2, (vti.Mask V0), VLOpFrag)), + vti.Scalar:$rs2, (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_MASK") vti.RegClass:$rs3, vti.RegClass:$rs1, vti.Scalar:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; } } } @@ -2078,16 +2078,16 @@ multiclass VPatAVGADDVL_VV_VX_RM { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vop (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), - vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + vti.RegClass:$merge, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vop (vti.Vector vti.RegClass:$rs1), (vti.Vector (SplatPat (XLenVT GPR:$rs2))), - vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + vti.RegClass:$merge, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2, - (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } } @@ -2107,16 +2107,16 @@ foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))), (vti.Vector vti.RegClass:$rs1), - vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + vti.RegClass:$merge, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVRSUB_VX_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)), (vti.Vector vti.RegClass:$rs1), - vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + vti.RegClass:$merge, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, simm5:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -2134,22 +2134,22 @@ foreach vtiToWti = AllWidenableIntVectors in { GetVTypePredicates.Predicates) in { def : Pat<(riscv_shl_vl (wti.Vector (riscv_sext_vl_oneuse (vti.Vector vti.RegClass:$rs1), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, VLOpFrag)), - wti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + wti.RegClass:$merge, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWADD_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs1), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, VLOpFrag)), - wti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + wti.RegClass:$merge, (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWADDU_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -2210,11 +2210,11 @@ foreach vtiTowti = AllWidenableIntVectors in { let Predicates = !listconcat(GetVTypePredicates.Predicates, GetVTypePredicates.Predicates) in def : Pat<(vti.Vector (riscv_trunc_vector_vl (wti.Vector wti.RegClass:$rs1), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVNSRL_WI_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } // 11.8. Vector Integer Comparison Instructions @@ -2301,41 +2301,41 @@ foreach vtiTowti = AllWidenableIntVectors in { def : Pat<(riscv_vwmaccsu_vl (vti.Vector vti.RegClass:$rs1), (SplatPat XLenVT:$rs2), (wti.Vector wti.RegClass:$rd), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWMACCUS_VX_"#vti.LMul.MX#"_MASK") wti.RegClass:$rd, vti.ScalarRegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } // 11.15. Vector Integer Merge Instructions foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0), + def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask VMV0:$vm), vti.RegClass:$rs1, vti.RegClass:$rs2, vti.RegClass:$merge, VLOpFrag)), (!cast("PseudoVMERGE_VVM_"#vti.LMul.MX) vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0), + def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask VMV0:$vm), (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, vti.RegClass:$merge, VLOpFrag)), (!cast("PseudoVMERGE_VXM_"#vti.LMul.MX) vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask V0), + def : Pat<(vti.Vector (riscv_vmerge_vl (vti.Mask VMV0:$vm), (SplatPat_simm5 simm5:$rs1), vti.RegClass:$rs2, vti.RegClass:$merge, VLOpFrag)), (!cast("PseudoVMERGE_VIM_"#vti.LMul.MX) vti.RegClass:$merge, vti.RegClass:$rs2, simm5:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW)>; } } @@ -2385,13 +2385,13 @@ class VPatTruncSatClipMaxMinBase(inst#"_WI_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0, - (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), 0, GPR:$vl, vti.Log2SEW, TA_MA)>; class VPatTruncSatClipUMin("PseudoVNCLIPU_WI_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs1, 0, - (vti.Mask V0), 0, GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), 0, GPR:$vl, vti.Log2SEW, TA_MA)>; multiclass VPatTruncSatClipMaxMin { @@ -2492,39 +2492,39 @@ defm : VPatFPSetCCVL_VV_VF_FV.Predicates in { // 13.8. Vector Floating-Point Square-Root Instruction - def : Pat<(any_riscv_fsqrt_vl (vti.Vector vti.RegClass:$rs2), (vti.Mask V0), + def : Pat<(any_riscv_fsqrt_vl (vti.Vector vti.RegClass:$rs2), (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFSQRT_V_"# vti.LMul.MX # "_E" # vti.SEW # "_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, - (vti.Mask V0), + (vti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, vti.Log2SEW, TA_MA)>; // 13.12. Vector Floating-Point Sign-Injection Instructions - def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask V0), + def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFSGNJX_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, - vti.RegClass:$rs, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; // Handle fneg with VFSGNJN using the same input for both operands. - def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask V0), + def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs, - vti.RegClass:$rs, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2), vti.RegClass:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, - vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), @@ -2541,26 +2541,26 @@ foreach vti = AllFloatVectors in { def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), (SplatFPOp vti.ScalarRegClass:$rs2), vti.RegClass:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, - vti.ScalarRegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; // Rounding without exception to implement nearbyint. def : Pat<(any_riscv_vfround_noexcept_vl (vti.Vector vti.RegClass:$rs1), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFROUND_NOEXCEPT_V_" # vti.LMul.MX #"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; // 14.14. Vector Floating-Point Classify Instruction def : Pat<(riscv_fclass_vl (vti.Vector vti.RegClass:$rs2), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVFCLASS_V_"# vti.LMul.MX #"_MASK") (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; } } @@ -2570,37 +2570,37 @@ foreach fvti = AllFloatVectors in { // 13.15. Vector Floating-Point Merge Instruction defvar ivti = GetIntVTypeInfo.Vti; let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.RegClass:$merge, VLOpFrag)), (!cast("PseudoVMERGE_VVM_"#fvti.LMul.MX) - fvti.RegClass:$merge, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), + fvti.RegClass:$merge, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), fvti.RegClass:$rs2, fvti.RegClass:$merge, VLOpFrag)), (!cast("PseudoVMERGE_VXM_"#fvti.LMul.MX) - fvti.RegClass:$merge, fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask V0), + fvti.RegClass:$merge, fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2, fvti.RegClass:$merge, VLOpFrag)), (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) - fvti.RegClass:$merge, fvti.RegClass:$rs2, 0, (fvti.Mask V0), + fvti.RegClass:$merge, fvti.RegClass:$rs2, 0, (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; } let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), + def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask VMV0:$vm), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2, fvti.RegClass:$merge, @@ -2608,7 +2608,7 @@ foreach fvti = AllFloatVectors in { (!cast("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX) fvti.RegClass:$merge, fvti.RegClass:$rs2, (fvti.Scalar fvti.ScalarRegClass:$rs1), - (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW)>; } } @@ -2670,11 +2670,11 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { GetVTypePredicates.Predicates)) in def : Pat<(fwti.Vector (any_riscv_fpextend_vl (fvti.Vector fvti.RegClass:$rs1), - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, - (fvti.Mask V0), + (fvti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>; } @@ -2702,10 +2702,10 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { GetVTypePredicates.Predicates)) in { def : Pat<(fvti.Vector (any_riscv_fpround_vl (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), VLOpFrag)), + (fwti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask V0), + (fwti.Mask VMV0:$vm), // Value to indicate no rounding mode change in // RISCVInsertReadWriteCSR FRM_DYN, @@ -2715,10 +2715,10 @@ foreach fvtiToFWti = AllWidenableFloatVectors in { GetVTypePredicates.Predicates) in def : Pat<(fvti.Vector (any_riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1), - (fwti.Mask V0), VLOpFrag)), + (fwti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK") (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1, - (fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TA_MA)>; + (fwti.Mask VMV0:$vm), GPR:$vl, fvti.Log2SEW, TA_MA)>; } } @@ -2822,20 +2822,20 @@ foreach mti = AllMasks in { VLOpFrag)), (!cast("PseudoVCPOP_M_" # mti.BX) VR:$rs2, GPR:$vl, mti.Log2SEW)>; - def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask V0), + def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVCPOP_M_" # mti.BX # "_MASK") - VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>; + VR:$rs2, (mti.Mask VMV0:$vm), GPR:$vl, mti.Log2SEW)>; // 15.3 vfirst find-first-set mask bit def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask true_mask), VLOpFrag)), (!cast("PseudoVFIRST_M_" # mti.BX) VR:$rs2, GPR:$vl, mti.Log2SEW)>; - def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask V0), + def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVFIRST_M_" # mti.BX # "_MASK") - VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>; + VR:$rs2, (mti.Mask VMV0:$vm), GPR:$vl, mti.Log2SEW)>; } } @@ -2858,26 +2858,26 @@ foreach vti = AllIntegerVectors in { def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2, vti.RegClass:$rs1, vti.RegClass:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX#"_E"# vti.SEW#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, vti.RegClass:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm, vti.RegClass:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } // emul = lmul * 16 / sew @@ -2893,11 +2893,11 @@ foreach vti = AllIntegerVectors in { (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2, (ivti.Vector ivti.RegClass:$rs1), vti.RegClass:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -2931,27 +2931,27 @@ foreach vti = AllFloatVectors in { (riscv_vrgather_vv_vl vti.RegClass:$rs2, (ivti.Vector vti.RegClass:$rs1), vti.RegClass:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VV_"# vti.LMul.MX#"_E"# vti.SEW#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1, vti.RegClass:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, uimm5:$imm, vti.RegClass:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } defvar vlmul = vti.LMul; @@ -2967,11 +2967,11 @@ foreach vti = AllFloatVectors in { (riscv_vrgatherei16_vv_vl vti.RegClass:$rs2, (ivti.Vector ivti.RegClass:$rs1), vti.RegClass:$merge, - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(inst#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, ivti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -3008,10 +3008,10 @@ def riscv_fslide1down_vl : SDNode<"RISCVISD::VFSLIDE1DOWN_VL", SDTRVVFSlide1, [ foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(vti.Vector (riscv_vid_vl (vti.Mask V0), + def : Pat<(vti.Vector (riscv_vid_vl (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVID_V_"#vti.LMul.MX#"_MASK") - (vti.Vector (IMPLICIT_DEF)), (vti.Mask V0), GPR:$vl, vti.Log2SEW, + (vti.Vector (IMPLICIT_DEF)), (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index aac7dc444a2de..16f727900a448 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -537,12 +537,12 @@ multiclass VPatUnaryVL_V.Predicates) in { def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$merge), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast(instruction_name#"_V_"#vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs1, - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -557,17 +557,17 @@ foreach vti = AllIntegerVectors in { (vti.Vector vti.RegClass:$rs1), (riscv_splat_vector -1), (vti.Vector vti.RegClass:$merge), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag), (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$merge), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVANDN_VV_"#vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -576,13 +576,13 @@ foreach vti = AllIntegerVectors in { (not vti.ScalarRegClass:$rs1)), (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$merge), - (vti.Mask V0), + (vti.Mask VMV0:$vm), VLOpFrag)), (!cast("PseudoVANDN_VX_"#vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, - (vti.Mask V0), + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; @@ -604,12 +604,12 @@ foreach vti = AllIntegerVectors in { def : Pat<(riscv_rotl_vl vti.RegClass:$rs2, (vti.Vector (SplatPat_uimm6 uimm6:$rs1)), (vti.Vector vti.RegClass:$merge), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVROR_VI_"#vti.LMul.MX#"_MASK") vti.RegClass:$merge, vti.RegClass:$rs2, (!cast("InvRot" # vti.SEW # "Imm") uimm6:$rs1), - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } defm : VPatBinaryVL_VV_VX_VI; @@ -624,90 +624,90 @@ foreach vtiToWti = AllWidenableIntVectors in { (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), (wti.Vector (ext_oneuse (vti.Vector vti.RegClass:$rs1))), (wti.Vector wti.RegClass:$merge), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (riscv_ext_vl_oneuse (vti.Vector vti.RegClass:$rs1), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector wti.RegClass:$merge), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))), (wti.Vector wti.RegClass:$merge), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK") wti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))), (wti.Vector wti.RegClass:$merge), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK") wti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), (wti.Vector (SplatPat_uimm5 uimm5:$rs1)), (wti.Vector wti.RegClass:$merge), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK") wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl (wti.Vector (riscv_zext_vl_oneuse (vti.Vector vti.RegClass:$rs2), - (vti.Mask V0), VLOpFrag)), + (vti.Mask VMV0:$vm), VLOpFrag)), (wti.Vector (SplatPat_uimm5 uimm5:$rs1)), (wti.Vector wti.RegClass:$merge), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK") wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_vwsll_vl (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$rs1), (wti.Vector wti.RegClass:$merge), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK") wti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_vwsll_vl (vti.Vector vti.RegClass:$rs2), (vti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))), (wti.Vector wti.RegClass:$merge), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK") wti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_vwsll_vl (vti.Vector vti.RegClass:$rs2), (vti.Vector (SplatPat_uimm5 uimm5:$rs1)), (wti.Vector wti.RegClass:$merge), - (vti.Mask V0), VLOpFrag), + (vti.Mask VMV0:$vm), VLOpFrag), (!cast("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK") wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1, - (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } } @@ -851,12 +851,12 @@ multiclass VPatBinaryV_VI_VROL; } } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 7e04e9154b524..146ac5700db04 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -132,6 +132,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { const TargetRegisterClass * getLargestSuperClass(const TargetRegisterClass *RC) const override { + if (RISCV::VMV0RegClass.hasSubClassEq(RC)) + return &RISCV::VMV0RegClass; if (RISCV::VRM8RegClass.hasSubClassEq(RC)) return &RISCV::VRM8RegClass; if (RISCV::VRM4RegClass.hasSubClassEq(RC)) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir index 42bf321228705..f8061462c6220 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/select.mir @@ -10,20 +10,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv1i8 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: select_nxv1i8 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb() = G_IMPLICIT_DEF @@ -41,20 +39,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv4i8 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: select_nxv4i8 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb() = G_IMPLICIT_DEF @@ -72,20 +68,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv16i8 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] ; RV32I-NEXT: PseudoRET implicit $v8m4 ; ; RV64I-LABEL: name: select_nxv16i8 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 3 /* e8 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] ; RV64I-NEXT: PseudoRET implicit $v8m4 %0:vrb() = G_IMPLICIT_DEF @@ -103,20 +97,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv64i8 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: select_nxv64i8 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF4_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF4_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb() = G_IMPLICIT_DEF @@ -134,20 +126,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv2i16 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: select_nxv2i16 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M1_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_M1 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_M1_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb() = G_IMPLICIT_DEF @@ -165,20 +155,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv8i16 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] ; RV32I-NEXT: PseudoRET implicit $v8m4 ; ; RV64I-LABEL: name: select_nxv8i16 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm4nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 4 /* e16 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M4_:%[0-9]+]]:vrm4nov0 = PseudoVMERGE_VVM_M4 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8m4 = COPY [[PseudoVMERGE_VVM_M4_]] ; RV64I-NEXT: PseudoRET implicit $v8m4 %0:vrb() = G_IMPLICIT_DEF @@ -196,20 +184,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv32i16 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: select_nxv32i16 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrnov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_MF2_:%[0-9]+]]:vrnov0 = PseudoVMERGE_VVM_MF2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMERGE_VVM_MF2_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb() = G_IMPLICIT_DEF @@ -227,20 +213,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv2i32 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV32I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] ; RV32I-NEXT: PseudoRET implicit $v8m2 ; ; RV64I-LABEL: name: select_nxv2i32 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV64I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] ; RV64I-NEXT: PseudoRET implicit $v8m2 %0:vrb() = G_IMPLICIT_DEF @@ -258,20 +242,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv8i32 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV32I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] ; RV32I-NEXT: PseudoRET implicit $v8m8 ; ; RV64I-LABEL: name: select_nxv8i32 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 5 /* e32 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 5 /* e32 */ ; RV64I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] ; RV64I-NEXT: PseudoRET implicit $v8m8 %0:vrb() = G_IMPLICIT_DEF @@ -289,20 +271,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv1i64 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 6 /* e64 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV32I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] ; RV32I-NEXT: PseudoRET implicit $v8m2 ; ; RV64I-LABEL: name: select_nxv1i64 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm2 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm2nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 6 /* e64 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M2_:%[0-9]+]]:vrm2nov0 = PseudoVMERGE_VVM_M2 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV64I-NEXT: $v8m2 = COPY [[PseudoVMERGE_VVM_M2_]] ; RV64I-NEXT: PseudoRET implicit $v8m2 %0:vrb() = G_IMPLICIT_DEF @@ -320,20 +300,18 @@ tracksRegLiveness: true body: | bb.0.entry: ; RV32I-LABEL: name: select_nxv4i64 - ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; RV32I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF - ; RV32I-NEXT: $v0 = COPY [[DEF]] - ; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 6 /* e64 */ + ; RV32I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV32I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] ; RV32I-NEXT: PseudoRET implicit $v8m8 ; ; RV64I-LABEL: name: select_nxv4i64 - ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF1:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; RV64I-NEXT: [[DEF2:%[0-9]+]]:vrm8nov0 = IMPLICIT_DEF - ; RV64I-NEXT: $v0 = COPY [[DEF]] - ; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], $v0, -1, 6 /* e64 */ + ; RV64I-NEXT: [[PseudoVMERGE_VVM_M8_:%[0-9]+]]:vrm8nov0 = PseudoVMERGE_VVM_M8 [[DEF2]], [[DEF1]], [[DEF1]], [[DEF]], -1, 6 /* e64 */ ; RV64I-NEXT: $v8m8 = COPY [[PseudoVMERGE_VVM_M8_]] ; RV64I-NEXT: PseudoRET implicit $v8m8 %0:vrb() = G_IMPLICIT_DEF diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll index 83a4f63add337..075678a6655b5 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -24,31 +24,31 @@ define void @_Z3foov() { ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_49) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_49) ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_48) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_48) -; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_46) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_46) -; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_45) ; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45) -; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vle16.v v14, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: vs2r.v v14, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: vs2r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll index ea8feef332984..e006e748320e5 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll @@ -108,15 +108,15 @@ define i64 @ctz_nxv8i1_no_range( %a) { ; RV64: # %bb.0: ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vid.v v16 +; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vid.v v24 ; RV64-NEXT: li a1, -1 -; RV64-NEXT: vmadd.vx v16, a1, v24 +; RV64-NEXT: vmadd.vx v24, a1, v16 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vmsne.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV64-NEXT: vmerge.vvm v8, v8, v24, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: sub a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/abd.ll b/llvm/test/CodeGen/RISCV/rvv/abd.ll index ddbfbd0b59fa4..affbae9c35cdb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/abd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abd.ll @@ -26,13 +26,13 @@ define @sabd_b_promoted_ops( %a, %a to %b.sext = sext %b to @@ -158,13 +158,13 @@ define @uabd_b_promoted_ops( %a, %a to %b.zext = zext %b to diff --git a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll index eb74e2d302f1a..698ecbf225a92 100644 --- a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll @@ -561,50 +561,26 @@ declare @llvm.vp.abs.nxv16i64(, i1 immarg define @vp_abs_nxv16i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_abs_nxv16i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vrsub.vi v8, v16, 0, v0.t -; CHECK-NEXT: vmax.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vrsub.vi v24, v16, 0, v0.t +; CHECK-NEXT: vmax.vv v16, v16, v24, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB46_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB46_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t -; CHECK-NEXT: vmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vrsub.vi v24, v8, 0, v0.t +; CHECK-NEXT: vmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.abs.nxv16i64( %va, i1 false, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index d063d0565f1ea..28de935892a2b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -2016,29 +2016,29 @@ define @vp_bitreverse_nxv4i64( %va, @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64( %va, @llvm.vp.bitreverse.nxv64i16(, @vp_bitreverse_nxv64i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_bitreverse_nxv64i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a1, a2, 1 +; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a1 -; CHECK-NEXT: slli a2, a2, 2 -; CHECK-NEXT: sub a1, a0, a2 -; CHECK-NEXT: sltu a3, a0, a1 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vslidedown.vx v0, v0, a2 +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub a2, a0, a1 +; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vsrl.vi v8, v16, 8, v0.t +; CHECK-NEXT: and a2, a3, a2 +; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-NEXT: vsrl.vi v24, v16, 8, v0.t ; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t -; CHECK-NEXT: vor.vv v16, v16, v8, v0.t -; CHECK-NEXT: vsrl.vi v8, v16, 4, v0.t -; CHECK-NEXT: lui a1, 1 -; CHECK-NEXT: addi a1, a1, -241 -; CHECK-NEXT: vand.vx v8, v8, a1, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vor.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsrl.vi v24, v16, 4, v0.t +; CHECK-NEXT: lui a2, 1 +; CHECK-NEXT: addi a2, a2, -241 +; CHECK-NEXT: vand.vx v24, v24, a2, v0.t +; CHECK-NEXT: vand.vx v16, v16, a2, v0.t ; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t -; CHECK-NEXT: vor.vv v16, v8, v16, v0.t -; CHECK-NEXT: vsrl.vi v8, v16, 2, v0.t +; CHECK-NEXT: vor.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsrl.vi v24, v16, 2, v0.t ; CHECK-NEXT: lui a3, 3 ; CHECK-NEXT: addi a3, a3, 819 -; CHECK-NEXT: vand.vx v8, v8, a3, v0.t +; CHECK-NEXT: vand.vx v24, v24, a3, v0.t ; CHECK-NEXT: vand.vx v16, v16, a3, v0.t ; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t -; CHECK-NEXT: vor.vv v16, v8, v16, v0.t -; CHECK-NEXT: vsrl.vi v8, v16, 1, v0.t +; CHECK-NEXT: vor.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsrl.vi v24, v16, 1, v0.t ; CHECK-NEXT: lui a4, 5 ; CHECK-NEXT: addi a4, a4, 1365 -; CHECK-NEXT: vand.vx v8, v8, a4, v0.t +; CHECK-NEXT: vand.vx v24, v24, a4, v0.t ; CHECK-NEXT: vand.vx v16, v16, a4, v0.t ; CHECK-NEXT: vsll.vi v16, v16, 1, v0.t -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t -; CHECK-NEXT: addi a5, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a0, a2, .LBB46_2 +; CHECK-NEXT: vor.vv v16, v24, v16, v0.t +; CHECK-NEXT: bltu a0, a1, .LBB46_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB46_2: ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsrl.vi v24, v8, 8, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t -; CHECK-NEXT: vand.vx v8, v8, a1, v0.t +; CHECK-NEXT: vor.vv v8, v8, v24, v0.t +; CHECK-NEXT: vsrl.vi v24, v8, 4, v0.t +; CHECK-NEXT: vand.vx v24, v24, a2, v0.t +; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t -; CHECK-NEXT: vand.vx v16, v16, a3, v0.t +; CHECK-NEXT: vor.vv v8, v24, v8, v0.t +; CHECK-NEXT: vsrl.vi v24, v8, 2, v0.t +; CHECK-NEXT: vand.vx v24, v24, a3, v0.t ; CHECK-NEXT: vand.vx v8, v8, a3, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t -; CHECK-NEXT: vand.vx v16, v16, a4, v0.t +; CHECK-NEXT: vor.vv v8, v24, v8, v0.t +; CHECK-NEXT: vsrl.vi v24, v8, 1, v0.t +; CHECK-NEXT: vand.vx v24, v24, a4, v0.t ; CHECK-NEXT: vand.vx v8, v8, a4, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vor.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 1 ; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv1r.v v25, v0 ; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-ZVBB-NEXT: slli a1, a1, 2 ; CHECK-ZVBB-NEXT: sub a2, a0, a1 @@ -3108,7 +3092,7 @@ define @vp_bitreverse_nxv64i16( %va, @llvm.vp.bitreverse.nxv64i16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll index 2e78d9f0d3790..7234601cdcf80 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -864,29 +864,29 @@ define @vp_bswap_nxv4i64( %va, @vp_bswap_nxv7i64( %va, @vp_bswap_nxv7i64( %va, @vp_bswap_nxv7i64( %va, @vp_bswap_nxv8i64( %va, @vp_bswap_nxv8i64( %va, @vp_bswap_nxv8i64( %va, @llvm.vp.bswap.nxv64i16(, @vp_bswap_nxv64i16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_bswap_nxv64i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: sub a2, a0, a1 @@ -1530,39 +1527,26 @@ define @vp_bswap_nxv64i16( %va, @vp_bswap_nxv64i16( %va, @llvm.vp.bswap.nxv64i16( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll index 6dcc3b8e3e54a..d396d6af3e1d3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -682,18 +682,10 @@ declare @llvm.vp.ceil.nxv16f64(, @vp_ceil_vv_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -702,49 +694,32 @@ define @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll index 113154c0f9855..156f20167e275 100644 --- a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll @@ -19,18 +19,18 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan ; RV32-LABEL: constant_folding_crash: ; RV32: # %bb.0: # %entry ; RV32-NEXT: lw a0, 8(a0) -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: andi a0, a0, 1 ; RV32-NEXT: seqz a0, a0 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV32-NEXT: vmv.v.x v11, a0 -; RV32-NEXT: vmsne.vi v0, v11, 0 +; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vmv1r.v v0, v11 ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-NEXT: vrgather.vi v9, v8, 0 ; RV32-NEXT: vmsne.vi v0, v9, 0 @@ -42,18 +42,18 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan ; RV64-LABEL: constant_folding_crash: ; RV64: # %bb.0: # %entry ; RV64-NEXT: ld a0, 8(a0) -; RV64-NEXT: vmv1r.v v12, v0 +; RV64-NEXT: vmv1r.v v13, v0 ; RV64-NEXT: andi a0, a0, 1 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; RV64-NEXT: vmv.v.x v13, a0 -; RV64-NEXT: vmsne.vi v0, v13, 0 +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vmsne.vi v0, v12, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmv1r.v v0, v13 ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-NEXT: vrgather.vi v9, v8, 0 ; RV64-NEXT: vmsne.vi v0, v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index 2a75e5ce7175d..288bd0e4b7ebe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -1235,10 +1235,10 @@ declare @llvm.vp.ctlz.nxv16i64(, i1 immar define @vp_ctlz_nxv16i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv16i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -1270,10 +1270,10 @@ define @vp_ctlz_nxv16i64( %va, @vp_ctlz_nxv16i64( %va, @llvm.vp.ctlz.nxv16i64( %va, i1 false, %m, i32 %evl) @@ -2465,10 +2465,10 @@ define @vp_ctlz_zero_undef_nxv8i64_unmasked( @vp_ctlz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -2497,10 +2497,10 @@ define @vp_ctlz_zero_undef_nxv16i64( %va, ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 3 ; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vmv1r.v v25, v0 ; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-ZVBB-NEXT: sub a2, a0, a1 ; CHECK-ZVBB-NEXT: sltu a3, a0, a2 @@ -2513,7 +2513,7 @@ define @vp_ctlz_zero_undef_nxv16i64( %va, ; CHECK-ZVBB-NEXT: mv a0, a1 ; CHECK-ZVBB-NEXT: .LBB94_2: ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vmv1r.v v0, v25 ; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 true, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll index 883f68aec1f42..094e5bc14f9b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -2018,228 +2018,131 @@ define @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @vp_ctpop_nxv16i64( %va, @llvm.vp.ctpop.nxv16i64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll index d13f4d2dca1ff..c833c76a779eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -42,9 +42,9 @@ define @cttz_nxv1i8( %va) { ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 ; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv1i8: @@ -59,9 +59,9 @@ define @cttz_nxv1i8( %va) { ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 ; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv1i8: @@ -108,9 +108,9 @@ define @cttz_nxv2i8( %va) { ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 ; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv2i8: @@ -125,9 +125,9 @@ define @cttz_nxv2i8( %va) { ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 ; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv2i8: @@ -174,9 +174,9 @@ define @cttz_nxv4i8( %va) { ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 ; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv4i8: @@ -191,9 +191,9 @@ define @cttz_nxv4i8( %va) { ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 ; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv4i8: @@ -240,9 +240,9 @@ define @cttz_nxv8i8( %va) { ; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v10, 0 ; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv8i8: @@ -257,9 +257,9 @@ define @cttz_nxv8i8( %va) { ; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v10, 0 ; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv8i8: @@ -306,9 +306,9 @@ define @cttz_nxv16i8( %va) { ; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-F-NEXT: vnsrl.wi v10, v12, 0 ; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v10, v10, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v10, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v10, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv16i8: @@ -323,9 +323,9 @@ define @cttz_nxv16i8( %va) { ; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-D-NEXT: vnsrl.wi v10, v12, 0 ; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v10, v10, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v10, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v10, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv16i8: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll index ef8a6c704a44b..30572c084badf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -2242,19 +2242,20 @@ define @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @vp_cttz_nxv16i64( %va, @llvm.vp.cttz.nxv16i64( %va, i1 false, %m, i32 %evl) @@ -4002,70 +3870,46 @@ define @vp_cttz_zero_undef_nxv8i64_unmasked( @vp_cttz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vrsub.vi v8, v16, 0, v0.t -; CHECK-NEXT: vand.vv v8, v16, v8, v0.t +; CHECK-NEXT: vrsub.vi v24, v16, 0, v0.t +; CHECK-NEXT: vand.vv v16, v16, v24, v0.t ; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.xu.v v16, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: li a2, 52 -; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t +; CHECK-NEXT: vsrl.vx v16, v16, a2, v0.t ; CHECK-NEXT: li a3, 1023 -; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vsub.vx v16, v16, a3, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB94_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB94_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t -; CHECK-NEXT: vand.vv v8, v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vrsub.vi v24, v8, 0, v0.t +; CHECK-NEXT: vand.vv v8, v8, v24, v0.t ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 3 ; CHECK-ZVBB-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vmv1r.v v25, v0 ; CHECK-ZVBB-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-ZVBB-NEXT: sub a2, a0, a1 ; CHECK-ZVBB-NEXT: sltu a3, a0, a2 @@ -4078,7 +3922,7 @@ define @vp_cttz_zero_undef_nxv16i64( %va, ; CHECK-ZVBB-NEXT: mv a0, a1 ; CHECK-ZVBB-NEXT: .LBB94_2: ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 +; CHECK-ZVBB-NEXT: vmv1r.v v0, v25 ; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index b4ab11cd17175..d74ec69c71968 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -149,14 +149,14 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: vl8r.v v16, (a0) ; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-NEXT: vmseq.vi v8, v8, 0 +; RV32-NEXT: vmseq.vi v24, v8, 0 ; RV32-NEXT: vmseq.vi v0, v16, 0 -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: vmerge.vim v24, v16, 1, v0 -; RV32-NEXT: vs8r.v v24, (a3) +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v16, v8, 1, v0 +; RV32-NEXT: vs8r.v v16, (a3) ; RV32-NEXT: add a2, a3, a2 -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v8, v16, 1, v0 +; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-NEXT: vs8r.v v8, (a2) ; RV32-NEXT: lbu a0, 0(a1) ; RV32-NEXT: addi sp, s0, -80 @@ -189,14 +189,14 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: vl8r.v v16, (a0) ; RV64-NEXT: add a1, a3, a1 ; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-NEXT: vmseq.vi v8, v8, 0 +; RV64-NEXT: vmseq.vi v24, v8, 0 ; RV64-NEXT: vmseq.vi v0, v16, 0 -; RV64-NEXT: vmv.v.i v16, 0 -; RV64-NEXT: vmerge.vim v24, v16, 1, v0 -; RV64-NEXT: vs8r.v v24, (a3) +; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: vmerge.vim v16, v8, 1, v0 +; RV64-NEXT: vs8r.v v16, (a3) ; RV64-NEXT: add a2, a3, a2 -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vmerge.vim v8, v16, 1, v0 +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-NEXT: vs8r.v v8, (a2) ; RV64-NEXT: lbu a0, 0(a1) ; RV64-NEXT: addi sp, s0, -80 diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll index 75747a6674b7b..ff5e8fb539a78 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll @@ -8,9 +8,9 @@ define @ceil_nxv1f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -30,9 +30,9 @@ define @ceil_nxv2f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -52,9 +52,9 @@ define @ceil_nxv4f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -74,9 +74,9 @@ define @ceil_nxv8f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -96,9 +96,9 @@ define @ceil_nxv16f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -118,9 +118,9 @@ define @ceil_nxv32f16( %x) strictfp { ; CHECK-LABEL: ceil_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -250,9 +250,9 @@ define @ceil_nxv1f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -272,9 +272,9 @@ define @ceil_nxv2f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -294,9 +294,9 @@ define @ceil_nxv4f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -316,9 +316,9 @@ define @ceil_nxv8f64( %x) strictfp { ; CHECK-LABEL: ceil_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll index 31a9453204457..57e7770879166 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll @@ -8,9 +8,9 @@ define @floor_nxv1f16( %x) strictfp { ; CHECK-LABEL: floor_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -30,9 +30,9 @@ define @floor_nxv2f16( %x) strictfp { ; CHECK-LABEL: floor_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -52,9 +52,9 @@ define @floor_nxv4f16( %x) strictfp { ; CHECK-LABEL: floor_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -74,9 +74,9 @@ define @floor_nxv8f16( %x) strictfp { ; CHECK-LABEL: floor_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -96,9 +96,9 @@ define @floor_nxv16f16( %x) strictfp { ; CHECK-LABEL: floor_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -118,9 +118,9 @@ define @floor_nxv32f16( %x) strictfp { ; CHECK-LABEL: floor_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -250,9 +250,9 @@ define @floor_nxv1f64( %x) strictfp { ; CHECK-LABEL: floor_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -272,9 +272,9 @@ define @floor_nxv2f64( %x) strictfp { ; CHECK-LABEL: floor_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -294,9 +294,9 @@ define @floor_nxv4f64( %x) strictfp { ; CHECK-LABEL: floor_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -316,9 +316,9 @@ define @floor_nxv8f64( %x) strictfp { ; CHECK-LABEL: floor_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 95a9777671ff8..dc2e011ac66d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1390,29 +1390,29 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %e ; RV32-NEXT: vand.vx v16, v8, a2, v0.t ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3, v0.t -; RV32-NEXT: vor.vv v16, v12, v16, v0.t +; RV32-NEXT: vor.vv v12, v12, v16, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v20, v12, 24, v0.t +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v12, v0.t +; RV32-NEXT: vand.vv v24, v8, v20, v0.t ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t -; RV32-NEXT: vor.vv v20, v20, v24, v0.t -; RV32-NEXT: vor.vv v16, v16, v20, v0.t -; RV32-NEXT: vsrl.vx v20, v8, a1, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v12, v12, v16, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t ; RV32-NEXT: vand.vx v24, v24, a2, v0.t -; RV32-NEXT: vor.vv v20, v24, v20, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: vand.vx v24, v24, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vand.vv v8, v8, v20, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: vor.vv v8, v8, v20, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -1687,7 +1687,9 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a5), zero @@ -1697,24 +1699,26 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v24, v24, 8, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -1804,13 +1808,13 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t ; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vor.vv v24, v16, v24, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v8, v16, v8, v0.t @@ -2051,7 +2055,9 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a5), zero @@ -2061,24 +2067,26 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v24, v24, 8, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -2168,13 +2176,13 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t ; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vor.vv v24, v16, v24, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v8, v16, v8, v0.t @@ -2377,87 +2385,63 @@ declare <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16>, <128 x i1>, i32) define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_bitreverse_v128i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: vslidedown.vi v24, v0, 8 +; CHECK-NEXT: vslidedown.vi v7, v0, 8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB34_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: .LBB34_2: ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t +; CHECK-NEXT: vsrl.vi v24, v8, 8, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vor.vv v8, v8, v24, v0.t +; CHECK-NEXT: vsrl.vi v24, v8, 4, v0.t ; CHECK-NEXT: lui a1, 1 ; CHECK-NEXT: addi a1, a1, -241 -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t +; CHECK-NEXT: vand.vx v24, v24, a1, v0.t ; CHECK-NEXT: vand.vx v8, v8, a1, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t +; CHECK-NEXT: vor.vv v8, v24, v8, v0.t +; CHECK-NEXT: vsrl.vi v24, v8, 2, v0.t ; CHECK-NEXT: lui a2, 3 ; CHECK-NEXT: addi a2, a2, 819 -; CHECK-NEXT: vand.vx v16, v16, a2, v0.t +; CHECK-NEXT: vand.vx v24, v24, a2, v0.t ; CHECK-NEXT: vand.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: vor.vv v8, v24, v8, v0.t +; CHECK-NEXT: vsrl.vi v24, v8, 1, v0.t ; CHECK-NEXT: lui a3, 5 ; CHECK-NEXT: addi a3, a3, 1365 -; CHECK-NEXT: vand.vx v16, v16, a3, v0.t +; CHECK-NEXT: vand.vx v24, v24, a3, v0.t ; CHECK-NEXT: vand.vx v8, v8, a3, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vor.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a4, a0, -64 ; CHECK-NEXT: sltu a0, a0, a4 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a4 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t -; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsrl.vi v24, v16, 8, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t +; CHECK-NEXT: vor.vv v16, v16, v24, v0.t +; CHECK-NEXT: vsrl.vi v24, v16, 4, v0.t +; CHECK-NEXT: vand.vx v24, v24, a1, v0.t ; CHECK-NEXT: vand.vx v16, v16, a1, v0.t -; CHECK-NEXT: vand.vx v8, v8, a1, v0.t -; CHECK-NEXT: vsll.vi v8, v8, 4, v0.t -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 2, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 4, v0.t +; CHECK-NEXT: vor.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsrl.vi v24, v16, 2, v0.t +; CHECK-NEXT: vand.vx v24, v24, a2, v0.t ; CHECK-NEXT: vand.vx v16, v16, a2, v0.t -; CHECK-NEXT: vand.vx v8, v8, a2, v0.t -; CHECK-NEXT: vsll.vi v8, v8, 2, v0.t -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 2, v0.t +; CHECK-NEXT: vor.vv v16, v24, v16, v0.t +; CHECK-NEXT: vsrl.vi v24, v16, 1, v0.t +; CHECK-NEXT: vand.vx v24, v24, a3, v0.t ; CHECK-NEXT: vand.vx v16, v16, a3, v0.t -; CHECK-NEXT: vand.vx v8, v8, a3, v0.t -; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vor.vv v16, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsll.vi v16, v16, 1, v0.t +; CHECK-NEXT: vor.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %v = call <128 x i16> @llvm.vp.bitreverse.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl) ret <128 x i16> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index b772fc23dca03..095d2fef751f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -610,29 +610,29 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-NEXT: vand.vx v16, v8, a2, v0.t ; RV32-NEXT: li a3, 40 ; RV32-NEXT: vsll.vx v16, v16, a3, v0.t -; RV32-NEXT: vor.vv v16, v12, v16, v0.t +; RV32-NEXT: vor.vv v12, v12, v16, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v20, v12, 24, v0.t +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v20, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v12, v0.t +; RV32-NEXT: vand.vv v24, v8, v20, v0.t ; RV32-NEXT: vsll.vi v24, v24, 8, v0.t -; RV32-NEXT: vor.vv v20, v20, v24, v0.t -; RV32-NEXT: vor.vv v16, v16, v20, v0.t -; RV32-NEXT: vsrl.vx v20, v8, a1, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v12, v12, v16, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t ; RV32-NEXT: vand.vx v24, v24, a2, v0.t -; RV32-NEXT: vor.vv v20, v24, v20, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV32-NEXT: vand.vx v24, v24, a4, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vand.vv v8, v8, v20, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: vor.vv v8, v8, v20, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -781,7 +781,9 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a5), zero @@ -791,24 +793,26 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v24, v24, 8, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -871,13 +875,13 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t ; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vor.vv v24, v16, v24, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v8, v16, v8, v0.t @@ -1012,7 +1016,9 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v24, v16, 24, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a5), zero @@ -1022,24 +1028,26 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsll.vi v24, v24, 8, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v24, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t @@ -1102,13 +1110,13 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t ; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t ; RV64-NEXT: vand.vx v16, v16, a3, v0.t -; RV64-NEXT: vor.vv v24, v16, v24, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v8, v16, v8, v0.t @@ -1217,51 +1225,27 @@ declare <128 x i16> @llvm.vp.bswap.v128i16(<128 x i16>, <128 x i1>, i32) define <128 x i16> @vp_bswap_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_bswap_v128i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: vslidedown.vi v24, v0, 8 +; CHECK-NEXT: vslidedown.vi v7, v0, 8 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t +; CHECK-NEXT: vsrl.vi v24, v8, 8, v0.t ; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vor.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a1, a0, -64 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vi v16, v8, 8, v0.t -; CHECK-NEXT: vsll.vi v8, v8, 8, v0.t -; CHECK-NEXT: vor.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsrl.vi v24, v16, 8, v0.t +; CHECK-NEXT: vsll.vi v16, v16, 8, v0.t +; CHECK-NEXT: vor.vv v16, v16, v24, v0.t ; CHECK-NEXT: ret %v = call <128 x i16> @llvm.vp.bswap.v128i16(<128 x i16> %va, <128 x i1> %m, i32 %evl) ret <128 x i16> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll index af7d7f7ae755b..886e874b9f958 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll @@ -126,14 +126,14 @@ define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: vsub.vv v10, v8, v9 ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: lui a1, %hi(.LCPI4_1) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI4_1) +; CHECK-NEXT: vle32.v v12, (a1) ; CHECK-NEXT: vslide1down.vx v11, v11, a0 -; CHECK-NEXT: lui a0, %hi(.LCPI4_1) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_1) -; CHECK-NEXT: vle32.v v12, (a0) ; CHECK-NEXT: vmulhu.vv v10, v10, v11 ; CHECK-NEXT: vadd.vv v9, v10, v9 -; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsrl.vv v9, v9, v12 +; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %e0 = udiv i32 %a, 23 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll index 68b479e3b9b8c..2af0292a6cc13 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -719,20 +719,9 @@ declare <32 x double> @llvm.vp.ceil.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -741,46 +730,32 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.ceil.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index 2f4539d5038c2..ac14ead7c951c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -1986,17 +1986,18 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v24, v0, 2 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -2034,112 +2035,55 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -2148,97 +2092,63 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a0, a0, a3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 2, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 @@ -2246,57 +2156,46 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; ; RV64-LABEL: vp_ctlz_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: vslidedown.vi v7, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB34_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB34_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 2, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 16, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t ; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV64-NEXT: lui a2, 349525 ; RV64-NEXT: addiw a2, a2, 1365 ; RV64-NEXT: slli a3, a2, 32 ; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vx v24, v24, a2, v0.t +; RV64-NEXT: vsub.vv v8, v8, v24, v0.t ; RV64-NEXT: lui a3, 209715 ; RV64-NEXT: addiw a3, a3, 819 ; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v16, v8, a3, v0.t +; RV64-NEXT: vand.vx v24, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: vadd.vv v8, v24, v8, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v24, v0.t ; RV64-NEXT: lui a4, 61681 ; RV64-NEXT: addiw a4, a4, -241 ; RV64-NEXT: slli a5, a4, 32 @@ -2309,50 +2208,37 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vmul.vx v8, v8, a5, v0.t ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t -; RV64-NEXT: addi a7, sp, 16 -; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill ; RV64-NEXT: addi a7, a0, -16 ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v16, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vmul.vx v8, v8, a5, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a6, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 2, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 8, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 16, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vx v24, v16, a1, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vnot.v v16, v16, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV64-NEXT: vand.vx v24, v24, a2, v0.t +; RV64-NEXT: vsub.vv v16, v16, v24, v0.t +; RV64-NEXT: vand.vx v24, v16, a3, v0.t +; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vadd.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV64-NEXT: vadd.vv v16, v16, v24, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vmul.vx v16, v16, a5, v0.t +; RV64-NEXT: vsrl.vx v16, v16, a6, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) ret <32 x i64> %v @@ -4539,17 +4425,18 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v24, v0, 2 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -4587,112 +4474,55 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -4701,97 +4531,63 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a0, a0, a3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v8, v24, 1, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 2, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 @@ -4799,57 +4595,46 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; ; RV64-LABEL: vp_ctlz_zero_undef_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: vslidedown.vi v7, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB70_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB70_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 2, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 2, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 16, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t ; RV64-NEXT: li a1, 32 -; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a1, v0.t +; RV64-NEXT: vor.vv v8, v8, v24, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV64-NEXT: lui a2, 349525 ; RV64-NEXT: addiw a2, a2, 1365 ; RV64-NEXT: slli a3, a2, 32 ; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vx v24, v24, a2, v0.t +; RV64-NEXT: vsub.vv v8, v8, v24, v0.t ; RV64-NEXT: lui a3, 209715 ; RV64-NEXT: addiw a3, a3, 819 ; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v16, v8, a3, v0.t +; RV64-NEXT: vand.vx v24, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: vadd.vv v8, v24, v8, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v24, v0.t ; RV64-NEXT: lui a4, 61681 ; RV64-NEXT: addiw a4, a4, -241 ; RV64-NEXT: slli a5, a4, 32 @@ -4862,50 +4647,37 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: vmul.vx v8, v8, a5, v0.t ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t -; RV64-NEXT: addi a7, sp, 16 -; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill ; RV64-NEXT: addi a7, a0, -16 ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vor.vv v16, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v8, v16, 2, v0.t -; RV64-NEXT: vor.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vmul.vx v8, v8, a5, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a6, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 2, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 8, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 16, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vx v24, v16, a1, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vnot.v v16, v16, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV64-NEXT: vand.vx v24, v24, a2, v0.t +; RV64-NEXT: vsub.vv v16, v16, v24, v0.t +; RV64-NEXT: vand.vx v24, v16, a3, v0.t +; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vadd.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV64-NEXT: vadd.vv v16, v16, v24, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vmul.vx v16, v16, a5, v0.t +; RV64-NEXT: vsrl.vx v16, v16, a6, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64> %va, i1 true, <32 x i1> %m, i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index 0b6d8b33394d5..ae34d056251a6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1528,72 +1528,50 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a2, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vlse64.v v24, (a2), zero ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a2, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v24, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t -; RV32-NEXT: vadd.vv v16, v16, v8, v0.t +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a2, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a2), zero +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: vlse64.v v8, (a2), zero ; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t +; RV32-NEXT: vmul.vv v24, v24, v8, v0.t ; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: vsrl.vx v8, v24, a1, v0.t ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 @@ -1610,25 +1588,25 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v24, v8, v0.t -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsub.vv v24, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: vand.vv v8, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb @@ -1655,43 +1633,32 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; ; RV64-LABEL: vp_ctpop_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: li a2, 16 -; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: vslidedown.vi v7, v0, 2 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB34_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB34_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV64-NEXT: lui a1, 349525 ; RV64-NEXT: addiw a1, a1, 1365 ; RV64-NEXT: slli a2, a1, 32 ; RV64-NEXT: add a1, a1, a2 -; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vx v24, v24, a1, v0.t +; RV64-NEXT: vsub.vv v8, v8, v24, v0.t ; RV64-NEXT: lui a2, 209715 ; RV64-NEXT: addiw a2, a2, 819 ; RV64-NEXT: slli a3, a2, 32 ; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: vand.vx v16, v8, a2, v0.t +; RV64-NEXT: vand.vx v24, v8, a2, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a2, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: vadd.vv v8, v24, v8, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v24, v0.t ; RV64-NEXT: lui a3, 61681 ; RV64-NEXT: addiw a3, a3, -241 ; RV64-NEXT: slli a4, a3, 32 @@ -1704,37 +1671,24 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: vmul.vx v8, v8, a4, v0.t ; RV64-NEXT: li a5, 56 ; RV64-NEXT: vsrl.vx v8, v8, a5, v0.t -; RV64-NEXT: addi a6, sp, 16 -; RV64-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill ; RV64-NEXT: addi a6, a0, -16 ; RV64-NEXT: sltu a0, a0, a6 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a6 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a1, v0.t -; RV64-NEXT: vsub.vv v16, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v16, a2, v0.t +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV64-NEXT: vand.vx v24, v24, a1, v0.t +; RV64-NEXT: vsub.vv v16, v16, v24, v0.t +; RV64-NEXT: vand.vx v24, v16, a2, v0.t ; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vmul.vx v8, v8, a4, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a5, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vadd.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV64-NEXT: vadd.vv v16, v16, v24, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vmul.vx v16, v16, a4, v0.t +; RV64-NEXT: vsrl.vx v16, v16, a5, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index f2926fa91e5c2..b1c25be7d3430 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1666,17 +1666,18 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v24, v0, 2 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -1692,124 +1693,67 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: li a3, 16 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a3, .LBB34_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB34_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB34_2: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: li a2, 1 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsub.vx v16, v8, a2, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -1818,87 +1762,53 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a0, a0, a3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vx v8, v16, a1, v0.t -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vx v8, v24, a2, v0.t +; RV32-NEXT: vnot.v v24, v24, v0.t +; RV32-NEXT: vand.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 @@ -1906,47 +1816,36 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; ; RV64-LABEL: vp_cttz_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: li a1, 16 -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: bltu a0, a1, .LBB34_2 -; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vslidedown.vi v7, v0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB34_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB34_2: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t +; RV64-NEXT: li a2, 1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsub.vx v24, v8, a2, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a3, a1, 32 +; RV64-NEXT: add a1, a1, a3 +; RV64-NEXT: vand.vx v24, v24, a1, v0.t +; RV64-NEXT: vsub.vv v8, v8, v24, v0.t ; RV64-NEXT: lui a3, 209715 ; RV64-NEXT: addiw a3, a3, 819 ; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v16, v8, a3, v0.t +; RV64-NEXT: vand.vx v24, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: vadd.vv v8, v24, v8, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v24, v0.t ; RV64-NEXT: lui a4, 61681 ; RV64-NEXT: addiw a4, a4, -241 ; RV64-NEXT: slli a5, a4, 32 @@ -1959,40 +1858,27 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: vmul.vx v8, v8, a5, v0.t ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t -; RV64-NEXT: addi a7, sp, 16 -; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill ; RV64-NEXT: addi a7, a0, -16 ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vmul.vx v8, v8, a5, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a6, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsub.vx v24, v16, a2, v0.t +; RV64-NEXT: vnot.v v16, v16, v0.t +; RV64-NEXT: vand.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV64-NEXT: vand.vx v24, v24, a1, v0.t +; RV64-NEXT: vsub.vv v16, v16, v24, v0.t +; RV64-NEXT: vand.vx v24, v16, a3, v0.t +; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vadd.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV64-NEXT: vadd.vv v16, v16, v24, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vmul.vx v16, v16, a5, v0.t +; RV64-NEXT: vsrl.vx v16, v16, a6, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) ret <32 x i64> %v @@ -3819,17 +3705,18 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v24, v0, 2 +; RV32-NEXT: vslidedown.vi v7, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -3845,124 +3732,67 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: li a3, 16 +; RV32-NEXT: li a2, 16 ; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a3, .LBB70_2 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB70_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 +; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB70_2: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: li a2, 1 +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsub.vx v16, v8, a2, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: addi a3, sp, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -3971,87 +3801,53 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a0, a0, a3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vx v8, v16, a1, v0.t -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vx v8, v24, a2, v0.t +; RV32-NEXT: vnot.v v24, v24, v0.t +; RV32-NEXT: vand.vv v8, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsub.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 56 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 @@ -4059,47 +3855,36 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; ; RV64-LABEL: vp_cttz_zero_undef_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: li a1, 16 -; RV64-NEXT: vslidedown.vi v24, v0, 2 -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: bltu a0, a1, .LBB70_2 -; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 +; RV64-NEXT: vslidedown.vi v7, v0, 2 +; RV64-NEXT: mv a1, a0 +; RV64-NEXT: bltu a0, a2, .LBB70_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB70_2: -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t +; RV64-NEXT: li a2, 1 +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV64-NEXT: vsub.vx v24, v8, a2, v0.t ; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: lui a2, 349525 -; RV64-NEXT: addiw a2, a2, 1365 -; RV64-NEXT: slli a3, a2, 32 -; RV64-NEXT: add a2, a2, a3 -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t +; RV64-NEXT: vand.vv v8, v8, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: addiw a1, a1, 1365 +; RV64-NEXT: slli a3, a1, 32 +; RV64-NEXT: add a1, a1, a3 +; RV64-NEXT: vand.vx v24, v24, a1, v0.t +; RV64-NEXT: vsub.vv v8, v8, v24, v0.t ; RV64-NEXT: lui a3, 209715 ; RV64-NEXT: addiw a3, a3, 819 ; RV64-NEXT: slli a4, a3, 32 ; RV64-NEXT: add a3, a3, a4 -; RV64-NEXT: vand.vx v16, v8, a3, v0.t +; RV64-NEXT: vand.vx v24, v8, a3, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t +; RV64-NEXT: vadd.vv v8, v24, v8, v0.t +; RV64-NEXT: vsrl.vi v24, v8, 4, v0.t +; RV64-NEXT: vadd.vv v8, v8, v24, v0.t ; RV64-NEXT: lui a4, 61681 ; RV64-NEXT: addiw a4, a4, -241 ; RV64-NEXT: slli a5, a4, 32 @@ -4112,40 +3897,27 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: vmul.vx v8, v8, a5, v0.t ; RV64-NEXT: li a6, 56 ; RV64-NEXT: vsrl.vx v8, v8, a6, v0.t -; RV64-NEXT: addi a7, sp, 16 -; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill ; RV64-NEXT: addi a7, a0, -16 ; RV64-NEXT: sltu a0, a0, a7 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: and a0, a0, a7 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsub.vx v16, v8, a1, v0.t -; RV64-NEXT: vnot.v v8, v8, v0.t -; RV64-NEXT: vand.vv v8, v8, v16, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV64-NEXT: vand.vx v16, v16, a2, v0.t -; RV64-NEXT: vsub.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v16, v8, a3, v0.t -; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV64-NEXT: vand.vx v8, v8, a3, v0.t -; RV64-NEXT: vadd.vv v8, v16, v8, v0.t -; RV64-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV64-NEXT: vadd.vv v8, v8, v16, v0.t -; RV64-NEXT: vand.vx v8, v8, a4, v0.t -; RV64-NEXT: vmul.vx v8, v8, a5, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a6, v0.t -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vsub.vx v24, v16, a2, v0.t +; RV64-NEXT: vnot.v v16, v16, v0.t +; RV64-NEXT: vand.vv v16, v16, v24, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV64-NEXT: vand.vx v24, v24, a1, v0.t +; RV64-NEXT: vsub.vv v16, v16, v24, v0.t +; RV64-NEXT: vand.vx v24, v16, a3, v0.t +; RV64-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vadd.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV64-NEXT: vadd.vv v16, v16, v24, v0.t +; RV64-NEXT: vand.vx v16, v16, a4, v0.t +; RV64-NEXT: vmul.vx v16, v16, a5, v0.t +; RV64-NEXT: vsrl.vx v16, v16, a6, v0.t ; RV64-NEXT: ret %v = call <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64> %va, i1 true, <32 x i1> %m, i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index 8c8da6d1e0031..309d568322a8b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -45,9 +45,9 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind { ; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RVF-NEXT: vnsrl.wi v9, v10, 0 ; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v9, v9, a1 ; RVF-NEXT: vmseq.vi v0, v8, 0 -; RVF-NEXT: vsub.vx v8, v9, a1 -; RVF-NEXT: vmerge.vim v8, v8, 8, v0 +; RVF-NEXT: vmerge.vim v8, v9, 8, v0 ; RVF-NEXT: vse8.v v8, (a0) ; RVF-NEXT: ret ; @@ -64,9 +64,9 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind { ; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RVD-NEXT: vnsrl.wi v9, v10, 0 ; RVD-NEXT: li a1, 127 +; RVD-NEXT: vsub.vx v9, v9, a1 ; RVD-NEXT: vmseq.vi v0, v8, 0 -; RVD-NEXT: vsub.vx v8, v9, a1 -; RVD-NEXT: vmerge.vim v8, v8, 8, v0 +; RVD-NEXT: vmerge.vim v8, v9, 8, v0 ; RVD-NEXT: vse8.v v8, (a0) ; RVD-NEXT: ret ; @@ -444,9 +444,9 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { ; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RVF-NEXT: vnsrl.wi v10, v12, 0 ; RVF-NEXT: li a1, 127 +; RVF-NEXT: vsub.vx v10, v10, a1 ; RVF-NEXT: vmseq.vi v0, v8, 0 -; RVF-NEXT: vsub.vx v8, v10, a1 -; RVF-NEXT: vmerge.vim v8, v8, 8, v0 +; RVF-NEXT: vmerge.vim v8, v10, 8, v0 ; RVF-NEXT: vse8.v v8, (a0) ; RVF-NEXT: ret ; @@ -464,9 +464,9 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { ; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RVD-NEXT: vnsrl.wi v10, v12, 0 ; RVD-NEXT: li a1, 127 +; RVD-NEXT: vsub.vx v10, v10, a1 ; RVD-NEXT: vmseq.vi v0, v8, 0 -; RVD-NEXT: vsub.vx v8, v10, a1 -; RVD-NEXT: vmerge.vim v8, v8, 8, v0 +; RVD-NEXT: vmerge.vim v8, v10, 8, v0 ; RVD-NEXT: vse8.v v8, (a0) ; RVD-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll index 88a7878e8532b..108bd85455526 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -10,9 +10,9 @@ define i1 @extractelt_v1i1(ptr %x, i64 %idx) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -332,13 +332,13 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: vle8.v v16, (a0) ; RV32-NEXT: mv a0, sp ; RV32-NEXT: add a1, a0, a1 -; RV32-NEXT: vmseq.vi v8, v8, 0 +; RV32-NEXT: vmseq.vi v24, v8, 0 ; RV32-NEXT: vmseq.vi v0, v16, 0 -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: vmerge.vim v24, v16, 1, v0 -; RV32-NEXT: vse8.v v24, (a0) -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v8, v16, 1, v0 +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v16, v8, 1, v0 +; RV32-NEXT: vse8.v v16, (a0) +; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: lbu a0, 0(a1) @@ -363,13 +363,13 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: vle8.v v16, (a0) ; RV64-NEXT: mv a0, sp ; RV64-NEXT: add a1, a0, a1 -; RV64-NEXT: vmseq.vi v8, v8, 0 +; RV64-NEXT: vmseq.vi v24, v8, 0 ; RV64-NEXT: vmseq.vi v0, v16, 0 -; RV64-NEXT: vmv.v.i v16, 0 -; RV64-NEXT: vmerge.vim v24, v16, 1, v0 -; RV64-NEXT: vse8.v v24, (a0) -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vmerge.vim v8, v16, 1, v0 +; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: vmerge.vim v16, v8, 1, v0 +; RV64-NEXT: vse8.v v16, (a0) +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: lbu a0, 0(a1) @@ -394,13 +394,13 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: vle8.v v16, (a0) ; RV32ZBS-NEXT: mv a0, sp ; RV32ZBS-NEXT: add a1, a0, a1 -; RV32ZBS-NEXT: vmseq.vi v8, v8, 0 +; RV32ZBS-NEXT: vmseq.vi v24, v8, 0 ; RV32ZBS-NEXT: vmseq.vi v0, v16, 0 -; RV32ZBS-NEXT: vmv.v.i v16, 0 -; RV32ZBS-NEXT: vmerge.vim v24, v16, 1, v0 -; RV32ZBS-NEXT: vse8.v v24, (a0) -; RV32ZBS-NEXT: vmv1r.v v0, v8 -; RV32ZBS-NEXT: vmerge.vim v8, v16, 1, v0 +; RV32ZBS-NEXT: vmv.v.i v8, 0 +; RV32ZBS-NEXT: vmerge.vim v16, v8, 1, v0 +; RV32ZBS-NEXT: vse8.v v16, (a0) +; RV32ZBS-NEXT: vmv1r.v v0, v24 +; RV32ZBS-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32ZBS-NEXT: addi a0, sp, 128 ; RV32ZBS-NEXT: vse8.v v8, (a0) ; RV32ZBS-NEXT: lbu a0, 0(a1) @@ -425,13 +425,13 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: vle8.v v16, (a0) ; RV64ZBS-NEXT: mv a0, sp ; RV64ZBS-NEXT: add a1, a0, a1 -; RV64ZBS-NEXT: vmseq.vi v8, v8, 0 +; RV64ZBS-NEXT: vmseq.vi v24, v8, 0 ; RV64ZBS-NEXT: vmseq.vi v0, v16, 0 -; RV64ZBS-NEXT: vmv.v.i v16, 0 -; RV64ZBS-NEXT: vmerge.vim v24, v16, 1, v0 -; RV64ZBS-NEXT: vse8.v v24, (a0) -; RV64ZBS-NEXT: vmv1r.v v0, v8 -; RV64ZBS-NEXT: vmerge.vim v8, v16, 1, v0 +; RV64ZBS-NEXT: vmv.v.i v8, 0 +; RV64ZBS-NEXT: vmerge.vim v16, v8, 1, v0 +; RV64ZBS-NEXT: vse8.v v16, (a0) +; RV64ZBS-NEXT: vmv1r.v v0, v24 +; RV64ZBS-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64ZBS-NEXT: addi a0, sp, 128 ; RV64ZBS-NEXT: vse8.v v8, (a0) ; RV64ZBS-NEXT: lbu a0, 0(a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll index 1e93a73ede5d6..ea6cdab0bd320 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll @@ -8,9 +8,9 @@ define <1 x half> @ceil_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: ceil_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -30,9 +30,9 @@ define <2 x half> @ceil_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: ceil_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -52,9 +52,9 @@ define <4 x half> @ceil_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: ceil_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -74,9 +74,9 @@ define <8 x half> @ceil_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: ceil_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -96,9 +96,9 @@ define <16 x half> @ceil_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: ceil_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -119,9 +119,9 @@ define <32 x half> @ceil_v32f16(<32 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -251,9 +251,9 @@ define <1 x double> @ceil_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: ceil_v1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -273,9 +273,9 @@ define <2 x double> @ceil_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: ceil_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -295,9 +295,9 @@ define <4 x double> @ceil_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: ceil_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -317,9 +317,9 @@ define <8 x double> @ceil_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: ceil_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll index 53018939fc6eb..4eca68e8e007b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll @@ -8,9 +8,9 @@ define <1 x half> @floor_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: floor_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -30,9 +30,9 @@ define <2 x half> @floor_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: floor_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -52,9 +52,9 @@ define <4 x half> @floor_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: floor_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -74,9 +74,9 @@ define <8 x half> @floor_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: floor_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -96,9 +96,9 @@ define <16 x half> @floor_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: floor_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -119,9 +119,9 @@ define <32 x half> @floor_v32f16(<32 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -251,9 +251,9 @@ define <1 x double> @floor_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: floor_v1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -273,9 +273,9 @@ define <2 x double> @floor_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: floor_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -295,9 +295,9 @@ define <4 x double> @floor_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: floor_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -317,9 +317,9 @@ define <8 x double> @floor_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: floor_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll index bd5833aa082c8..25b9805d20cda 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -719,20 +719,9 @@ declare <32 x double> @llvm.vp.floor.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -741,46 +730,32 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.floor.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index ed60aeca9f4d7..8d378faa8ac34 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -13,32 +13,33 @@ declare <2 x half> @llvm.vp.maximum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked: @@ -66,12 +66,11 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare <4 x half> @llvm.vp.maximum.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32) define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked: @@ -138,12 +137,11 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,33 +155,34 @@ declare <8 x half> @llvm.vp.maximum.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32) define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v0, v14, v14, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v12, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmax.vv v10, v12, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -196,11 +195,10 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked: @@ -212,9 +210,8 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v2, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v2 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -229,33 +226,34 @@ declare <16 x half> @llvm.vp.maximum.v16f16(<16 x half>, <16 x half>, <16 x i1>, define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 ; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmax.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v0, v20, v20, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v16, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmax.vv v12, v16, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -268,9 +266,8 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v2, v10, v10 ; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v2 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 ; ZVFH-NEXT: vfmax.vv v8, v8, v12 ; ZVFH-NEXT: ret @@ -284,9 +281,8 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v4, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v4 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 ; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma @@ -301,15 +297,15 @@ declare <2 x float> @llvm.vp.maximum.v2f32(<2 x float>, <2 x float>, <2 x i1>, i define <2 x float> @vfmax_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) ret <2 x float> %v @@ -320,11 +316,10 @@ define <2 x float> @vfmax_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v @@ -335,15 +330,15 @@ declare <4 x float> @llvm.vp.maximum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) ret <4 x float> %v @@ -354,11 +349,10 @@ define <4 x float> @vfmax_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v @@ -369,15 +363,15 @@ declare <8 x float> @llvm.vp.maximum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 ; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) ret <8 x float> %v @@ -388,9 +382,8 @@ define <8 x float> @vfmax_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -403,15 +396,15 @@ declare <16 x float> @llvm.vp.maximum.v16f32(<16 x float>, <16 x float>, <16 x i define <16 x float> @vfmax_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 ; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) ret <16 x float> %v @@ -422,9 +415,8 @@ define <16 x float> @vfmax_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -437,15 +429,15 @@ declare <2 x double> @llvm.vp.maximum.v2f64(<2 x double>, <2 x double>, <2 x i1> define <2 x double> @vfmax_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) ret <2 x double> %v @@ -456,11 +448,10 @@ define <2 x double> @vfmax_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v @@ -471,15 +462,15 @@ declare <4 x double> @llvm.vp.maximum.v4f64(<4 x double>, <4 x double>, <4 x i1> define <4 x double> @vfmax_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 ; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) ret <4 x double> %v @@ -490,9 +481,8 @@ define <4 x double> @vfmax_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -505,15 +495,15 @@ declare <8 x double> @llvm.vp.maximum.v8f64(<8 x double>, <8 x double>, <8 x i1> define <8 x double> @vfmax_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 ; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) ret <8 x double> %v @@ -524,9 +514,8 @@ define <8 x double> @vfmax_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -539,28 +528,15 @@ declare <16 x double> @llvm.vp.maximum.v16f64(<16 x double>, <16 x double>, <16 define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -569,25 +545,12 @@ define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 define <16 x double> @vfmax_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %vb, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v16f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v @@ -601,16 +564,15 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -618,11 +580,6 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -632,14 +589,10 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB24_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 @@ -656,7 +609,7 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -670,8 +623,7 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -682,12 +634,13 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -701,24 +654,21 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a3, a1, 4 -; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB25_2 @@ -727,30 +677,23 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v24, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -759,32 +702,25 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vfmax.vv v16, v16, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll index c92770ded42b7..ffc09b866284a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll @@ -15,11 +15,10 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v2f16_vv: @@ -29,11 +28,10 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -48,11 +46,10 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v4f16_vv: @@ -62,11 +59,10 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v11 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -81,11 +77,10 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v8f16_vv: @@ -95,9 +90,8 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmfeq.vv v2, v10, v10 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v2 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -114,9 +108,8 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v2, v10, v10 ; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v2 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 ; ZVFH-NEXT: vfmax.vv v8, v8, v12 ; ZVFH-NEXT: ret @@ -128,9 +121,8 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v4 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -147,11 +139,10 @@ define <2 x float> @vfmax_v2f32_vv(<2 x float> %a, <2 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.maximum.v2f32(<2 x float> %a, <2 x float> %b) ret <2 x float> %v @@ -164,11 +155,10 @@ define <4 x float> @vfmax_v4f32_vv(<4 x float> %a, <4 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.maximum.v4f32(<4 x float> %a, <4 x float> %b) ret <4 x float> %v @@ -181,9 +171,8 @@ define <8 x float> @vfmax_v8f32_vv(<8 x float> %a, <8 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -198,9 +187,8 @@ define <16 x float> @vfmax_v16f32_vv(<16 x float> %a, <16 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -215,11 +203,10 @@ define <2 x double> @vfmax_v2f64_vv(<2 x double> %a, <2 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.maximum.v2f64(<2 x double> %a, <2 x double> %b) ret <2 x double> %v @@ -232,9 +219,8 @@ define <4 x double> @vfmax_v4f64_vv(<4 x double> %a, <4 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -249,9 +235,8 @@ define <8 x double> @vfmax_v8f64_vv(<8 x double> %a, <8 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -264,23 +249,12 @@ declare <16 x double> @llvm.maximum.v16f64(<16 x double>, <16 x double>) define <16 x double> @vfmax_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwind { ; CHECK-LABEL: vfmax_v16f64_vv: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.maximum.v16f64(<16 x double> %a, <16 x double> %b) ret <16 x double> %v @@ -312,13 +286,12 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmax_v2f16_vv_nnana: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vfadd.vv v10, v8, v8 +; ZVFH-NEXT: vfadd.vv v8, v8, v8 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9 -; ZVFH-NEXT: vmfeq.vv v8, v10, v10 -; ZVFH-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v8 -; ZVFH-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFH-NEXT: vfmax.vv v8, v11, v8 +; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0 +; ZVFH-NEXT: vfmax.vv v8, v10, v8 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnana: @@ -329,16 +302,15 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9 -; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 +; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -353,13 +325,12 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmax_v2f16_vv_nnanb: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vfadd.vv v10, v9, v9 +; ZVFH-NEXT: vfadd.vv v9, v9, v9 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v9, v10, v10 -; ZVFH-NEXT: vmerge.vvm v11, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v9 -; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnanb: @@ -374,12 +345,11 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0 +; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index c201c8842e082..3831261792242 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -13,32 +13,33 @@ declare <2 x half> @llvm.vp.minimum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) define <2 x half> @vfmin_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16_unmasked: @@ -66,12 +66,11 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare <4 x half> @llvm.vp.minimum.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32) define <4 x half> @vfmin_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16_unmasked: @@ -138,12 +137,11 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,33 +155,34 @@ declare <8 x half> @llvm.vp.minimum.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32) define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v0, v14, v14, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v12, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmin.vv v10, v12, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -196,11 +195,10 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16_unmasked: @@ -212,9 +210,8 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v2, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v2 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma @@ -229,33 +226,34 @@ declare <16 x half> @llvm.vp.minimum.v16f16(<16 x half>, <16 x half>, <16 x i1>, define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 ; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmin.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v0, v20, v20, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v16, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmin.vv v12, v16, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -268,9 +266,8 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v2, v10, v10 ; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v2 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 ; ZVFH-NEXT: vfmin.vv v8, v8, v12 ; ZVFH-NEXT: ret @@ -284,9 +281,8 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v4, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v4 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 ; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma @@ -301,15 +297,15 @@ declare <2 x float> @llvm.vp.minimum.v2f32(<2 x float>, <2 x float>, <2 x i1>, i define <2 x float> @vfmin_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.minimum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) ret <2 x float> %v @@ -320,11 +316,10 @@ define <2 x float> @vfmin_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.minimum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v @@ -335,15 +330,15 @@ declare <4 x float> @llvm.vp.minimum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i define <4 x float> @vfmin_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.minimum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) ret <4 x float> %v @@ -354,11 +349,10 @@ define <4 x float> @vfmin_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.minimum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v @@ -369,15 +363,15 @@ declare <8 x float> @llvm.vp.minimum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i define <8 x float> @vfmin_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 ; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.minimum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) ret <8 x float> %v @@ -388,9 +382,8 @@ define <8 x float> @vfmin_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -403,15 +396,15 @@ declare <16 x float> @llvm.vp.minimum.v16f32(<16 x float>, <16 x float>, <16 x i define <16 x float> @vfmin_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 ; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.minimum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) ret <16 x float> %v @@ -422,9 +415,8 @@ define <16 x float> @vfmin_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -437,15 +429,15 @@ declare <2 x double> @llvm.vp.minimum.v2f64(<2 x double>, <2 x double>, <2 x i1> define <2 x double> @vfmin_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.minimum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) ret <2 x double> %v @@ -456,11 +448,10 @@ define <2 x double> @vfmin_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.minimum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v @@ -471,15 +462,15 @@ declare <4 x double> @llvm.vp.minimum.v4f64(<4 x double>, <4 x double>, <4 x i1> define <4 x double> @vfmin_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 ; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.minimum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) ret <4 x double> %v @@ -490,9 +481,8 @@ define <4 x double> @vfmin_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -505,15 +495,15 @@ declare <8 x double> @llvm.vp.minimum.v8f64(<8 x double>, <8 x double>, <8 x i1> define <8 x double> @vfmin_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 ; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.minimum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) ret <8 x double> %v @@ -524,9 +514,8 @@ define <8 x double> @vfmin_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -539,28 +528,15 @@ declare <16 x double> @llvm.vp.minimum.v16f64(<16 x double>, <16 x double>, <16 define <16 x double> @vfmin_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.minimum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -569,25 +545,12 @@ define <16 x double> @vfmin_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 define <16 x double> @vfmin_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %vb, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v16f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.minimum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v @@ -601,16 +564,15 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -618,11 +580,6 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -632,14 +589,10 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB24_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 @@ -656,7 +609,7 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -670,8 +623,7 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -682,12 +634,13 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -701,24 +654,21 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a3, a1, 4 -; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB25_2 @@ -727,30 +677,23 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v24, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -759,32 +702,25 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vfmin.vv v16, v16, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll index bed3a456f68ff..6f34e487ee05d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll @@ -15,11 +15,10 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v2f16_vv: @@ -29,11 +28,10 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -48,11 +46,10 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v4f16_vv: @@ -62,11 +59,10 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 -; ZVFHMIN-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v11 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -81,11 +77,10 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v8f16_vv: @@ -95,9 +90,8 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmfeq.vv v2, v10, v10 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v2 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -114,9 +108,8 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v2, v10, v10 ; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v2 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 ; ZVFH-NEXT: vfmin.vv v8, v8, v12 ; ZVFH-NEXT: ret @@ -128,9 +121,8 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v4 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -147,11 +139,10 @@ define <2 x float> @vfmin_v2f32_vv(<2 x float> %a, <2 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.minimum.v2f32(<2 x float> %a, <2 x float> %b) ret <2 x float> %v @@ -164,11 +155,10 @@ define <4 x float> @vfmin_v4f32_vv(<4 x float> %a, <4 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.minimum.v4f32(<4 x float> %a, <4 x float> %b) ret <4 x float> %v @@ -181,9 +171,8 @@ define <8 x float> @vfmin_v8f32_vv(<8 x float> %a, <8 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -198,9 +187,8 @@ define <16 x float> @vfmin_v16f32_vv(<16 x float> %a, <16 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -215,11 +203,10 @@ define <2 x double> @vfmin_v2f64_vv(<2 x double> %a, <2 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.minimum.v2f64(<2 x double> %a, <2 x double> %b) ret <2 x double> %v @@ -232,9 +219,8 @@ define <4 x double> @vfmin_v4f64_vv(<4 x double> %a, <4 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -249,9 +235,8 @@ define <8 x double> @vfmin_v8f64_vv(<8 x double> %a, <8 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -264,23 +249,12 @@ declare <16 x double> @llvm.minimum.v16f64(<16 x double>, <16 x double>) define <16 x double> @vfmin_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwind { ; CHECK-LABEL: vfmin_v16f64_vv: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.minimum.v16f64(<16 x double> %a, <16 x double> %b) ret <16 x double> %v @@ -312,13 +286,12 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmin_v2f16_vv_nnana: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vfadd.vv v10, v8, v8 +; ZVFH-NEXT: vfadd.vv v8, v8, v8 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9 -; ZVFH-NEXT: vmfeq.vv v8, v10, v10 -; ZVFH-NEXT: vmerge.vvm v11, v9, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v8 -; ZVFH-NEXT: vmerge.vvm v8, v10, v9, v0 -; ZVFH-NEXT: vfmin.vv v8, v11, v8 +; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 +; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0 +; ZVFH-NEXT: vfmin.vv v8, v10, v8 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnana: @@ -329,16 +302,15 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-NEXT: vfadd.vv v8, v10, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v9, v9 -; ZVFHMIN-NEXT: vmerge.vvm v10, v11, v9, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v11, v0 +; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -353,13 +325,12 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmin_v2f16_vv_nnanb: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vfadd.vv v10, v9, v9 +; ZVFH-NEXT: vfadd.vv v9, v9, v9 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v9, v10, v10 -; ZVFH-NEXT: vmerge.vvm v11, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v9 -; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 +; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnanb: @@ -374,12 +345,11 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v9, v0 +; ZVFHMIN-NEXT: vmerge.vvm v10, v9, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v9, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll index 1b50214bbf164..d0d1bf4241c76 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll @@ -10,9 +10,9 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -33,9 +33,9 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -56,9 +56,9 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -79,9 +79,9 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp { ; CHECK-LABEL: nearbyint_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -103,9 +103,9 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -218,9 +218,9 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -241,9 +241,9 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI10_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -264,9 +264,9 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp { ; CHECK-LABEL: nearbyint_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 8e214e4054783..9e83efd351953 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1407,8 +1407,8 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float ; CHECK-NEXT: vfmv.v.f v8, fa4 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x float> poison, float %e0, i64 0 @@ -1458,8 +1458,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d ; CHECK-NEXT: vfmv.v.f v8, fa4 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa7 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x double> poison, double %e0, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index 6bfd0ac932672..3b85328bf921a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -57,8 +57,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -68,8 +68,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v8, v11 +; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret @@ -244,16 +244,17 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-NEXT: slli a0, a0, 3 ; V128-NEXT: sub sp, sp, a0 ; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; V128-NEXT: vmv8r.v v0, v16 -; V128-NEXT: addi a0, sp, 16 -; V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; V128-NEXT: vmv8r.v v24, v16 ; V128-NEXT: vmv8r.v v16, v8 ; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; V128-NEXT: vslidedown.vi v8, v0, 16 +; V128-NEXT: vmv8r.v v8, v24 +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; V128-NEXT: vslidedown.vi v0, v24, 16 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; V128-NEXT: vwaddu.vv v24, v0, v8 +; V128-NEXT: vwaddu.vv v24, v8, v0 ; V128-NEXT: li a0, -1 -; V128-NEXT: vwmaccu.vx v24, a0, v8 +; V128-NEXT: vwmaccu.vx v24, a0, v0 ; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; V128-NEXT: vslidedown.vi v0, v16, 16 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -261,9 +262,9 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-NEXT: vwmaccu.vx v8, a0, v16 ; V128-NEXT: lui a1, 699051 ; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: li a2, 32 ; V128-NEXT: vmv.s.x v0, a1 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v8, v24, v0 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: addi a1, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll index 85b849045e8ce..a8e4af2d7368e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -395,8 +395,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmin.d fa5, fa5, fa4 ; RV32-NEXT: fcvt.w.d a2, fa5, rtz ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslide1down.vx v9, v9, a0 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: addi sp, s0, -128 @@ -496,8 +496,8 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmin.d fa5, fa5, fa4 ; RV64-NEXT: fcvt.l.d a2, fa5, rtz ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslide1down.vx v9, v9, a0 +; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: addi sp, s0, -128 @@ -580,8 +580,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV32-NEXT: fmax.d fa4, fa4, fa3 ; RV32-NEXT: fmin.d fa5, fa4, fa5 ; RV32-NEXT: fcvt.wu.d a0, fa5, rtz -; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslide1down.vx v9, v9, a0 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV32-NEXT: vse8.v v9, (a1) ; RV32-NEXT: addi sp, s0, -128 @@ -656,8 +656,8 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) { ; RV64-NEXT: fmax.d fa4, fa4, fa3 ; RV64-NEXT: fmin.d fa5, fa4, fa5 ; RV64-NEXT: fcvt.lu.d a0, fa5, rtz -; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslide1down.vx v9, v9, a0 +; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslidedown.vi v9, v8, 4, v0.t ; RV64-NEXT: vse8.v v9, (a1) ; RV64-NEXT: addi sp, s0, -128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll index f189354237ee3..cb7ff2241f5aa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-costrained-sdnode.ll @@ -10,9 +10,9 @@ define <1 x half> @round_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: round_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -32,9 +32,9 @@ define <2 x half> @round_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: round_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -54,9 +54,9 @@ define <4 x half> @round_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: round_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -76,9 +76,9 @@ define <8 x half> @round_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: round_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -98,9 +98,9 @@ define <16 x half> @round_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: round_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -121,9 +121,9 @@ define <32 x half> @round_v32f16(<32 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -253,9 +253,9 @@ define <1 x double> @round_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: round_v1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -275,9 +275,9 @@ define <2 x double> @round_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: round_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -297,9 +297,9 @@ define <4 x double> @round_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: round_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -319,9 +319,9 @@ define <8 x double> @round_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: round_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll index 11920c7c31c98..f5549adb740cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll @@ -10,9 +10,9 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -32,9 +32,9 @@ define <2 x half> @roundeven_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -54,9 +54,9 @@ define <4 x half> @roundeven_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -76,9 +76,9 @@ define <8 x half> @roundeven_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -98,9 +98,9 @@ define <16 x half> @roundeven_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: roundeven_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -121,9 +121,9 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -253,9 +253,9 @@ define <1 x double> @roundeven_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -275,9 +275,9 @@ define <2 x double> @roundeven_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -297,9 +297,9 @@ define <4 x double> @roundeven_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -319,9 +319,9 @@ define <8 x double> @roundeven_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: roundeven_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll index a2ff77625b758..d2cf4c11ac1a5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll @@ -680,14 +680,15 @@ define <16 x i64> @fshr_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vand.vx v8, v24, a0, v0.t -; CHECK-NEXT: vsrl.vv v16, v16, v8, v0.t -; CHECK-NEXT: vnot.v v8, v24, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t +; CHECK-NEXT: vsrl.vv v16, v16, v24, v0.t +; CHECK-NEXT: vnot.v v24, v8, v0.t +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsll.vi v24, v24, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v24, v8, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v24, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -705,27 +706,38 @@ define <16 x i64> @fshl_v16i64(<16 x i64> %a, <16 x i64> %b, <16 x i64> %c, <16 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vand.vx v8, v24, a0, v0.t -; CHECK-NEXT: vsll.vv v8, v16, v8, v0.t -; CHECK-NEXT: vnot.v v16, v24, v0.t -; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t +; CHECK-NEXT: vsll.vv v16, v16, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vnot.v v24, v8, v0.t +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: vsrl.vv v16, v16, v24, v0.t ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vi v24, v24, 1, v0.t -; CHECK-NEXT: vsrl.vv v16, v24, v16, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll index f16581444afca..2481dd4054c25 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ftrunc-constrained-sdnode.ll @@ -8,9 +8,9 @@ define <1 x half> @trunc_v1f16(<1 x half> %x) strictfp { ; CHECK-LABEL: trunc_v1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -28,9 +28,9 @@ define <2 x half> @trunc_v2f16(<2 x half> %x) strictfp { ; CHECK-LABEL: trunc_v2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -48,9 +48,9 @@ define <4 x half> @trunc_v4f16(<4 x half> %x) strictfp { ; CHECK-LABEL: trunc_v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -68,9 +68,9 @@ define <8 x half> @trunc_v8f16(<8 x half> %x) strictfp { ; CHECK-LABEL: trunc_v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -88,9 +88,9 @@ define <16 x half> @trunc_v16f16(<16 x half> %x) strictfp { ; CHECK-LABEL: trunc_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -109,9 +109,9 @@ define <32 x half> @trunc_v32f16(<32 x half> %x) strictfp { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -229,9 +229,9 @@ define <1 x double> @trunc_v1f64(<1 x double> %x) strictfp { ; CHECK-LABEL: trunc_v1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -249,9 +249,9 @@ define <2 x double> @trunc_v2f64(<2 x double> %x) strictfp { ; CHECK-LABEL: trunc_v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -269,9 +269,9 @@ define <4 x double> @trunc_v4f64(<4 x double> %x) strictfp { ; CHECK-LABEL: trunc_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -289,9 +289,9 @@ define <8 x double> @trunc_v8f64(<8 x double> %x) strictfp { ; CHECK-LABEL: trunc_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index ab6df1d3e883f..7f03bab962b53 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -391,18 +391,18 @@ define void @insert_v8i1_v4i1_0(ptr %vp, ptr %svp) { ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vlm.v v8, (a1) +; CHECK-NEXT: vlm.v v10, (a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vmv.v.v v9, v8 +; CHECK-NEXT: vmv.v.v v8, v9 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v9, 0 +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vsm.v v8, (a0) ; CHECK-NEXT: ret %v = load <8 x i1>, ptr %vp @@ -418,17 +418,17 @@ define void @insert_v8i1_v4i1_4(ptr %vp, ptr %svp) { ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vlm.v v8, (a1) +; CHECK-NEXT: vlm.v v10, (a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 4 -; CHECK-NEXT: vmsne.vi v8, v9, 0 +; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vsm.v v8, (a0) ; CHECK-NEXT: ret %v = load <8 x i1>, ptr %vp @@ -468,32 +468,32 @@ define @insert_nxv2i1_v4i1_0( %v, ptr %svp) { ; VLA-LABEL: insert_nxv2i1_v4i1_0: ; VLA: # %bb.0: ; VLA-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; VLA-NEXT: vlm.v v8, (a0) +; VLA-NEXT: vlm.v v10, (a0) ; VLA-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; VLA-NEXT: vmv.v.i v8, 0 +; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; VLA-NEXT: vmv.v.i v9, 0 +; VLA-NEXT: vmv1r.v v0, v10 ; VLA-NEXT: vmerge.vim v9, v9, 1, v0 -; VLA-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; VLA-NEXT: vmv.v.i v10, 0 -; VLA-NEXT: vmv1r.v v0, v8 -; VLA-NEXT: vmerge.vim v8, v10, 1, v0 ; VLA-NEXT: vsetvli zero, zero, e8, mf4, tu, ma -; VLA-NEXT: vmv.v.v v9, v8 +; VLA-NEXT: vmv.v.v v8, v9 ; VLA-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; VLA-NEXT: vmsne.vi v0, v9, 0 +; VLA-NEXT: vmsne.vi v0, v8, 0 ; VLA-NEXT: ret ; ; VLS-LABEL: insert_nxv2i1_v4i1_0: ; VLS: # %bb.0: ; VLS-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; VLS-NEXT: vlm.v v8, (a0) -; VLS-NEXT: vmv.v.i v9, 0 -; VLS-NEXT: vmerge.vim v10, v9, 1, v0 -; VLS-NEXT: vmv1r.v v0, v8 -; VLS-NEXT: vmerge.vim v8, v9, 1, v0 +; VLS-NEXT: vlm.v v10, (a0) +; VLS-NEXT: vmv.v.i v8, 0 +; VLS-NEXT: vmerge.vim v9, v8, 1, v0 +; VLS-NEXT: vmv1r.v v0, v10 +; VLS-NEXT: vmerge.vim v8, v8, 1, v0 ; VLS-NEXT: vsetvli zero, zero, e8, mf4, tu, ma -; VLS-NEXT: vmv.v.v v10, v8 +; VLS-NEXT: vmv.v.v v9, v8 ; VLS-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; VLS-NEXT: vmsne.vi v0, v10, 0 +; VLS-NEXT: vmsne.vi v0, v9, 0 ; VLS-NEXT: ret %sv = load <4 x i1>, ptr %svp %c = call @llvm.vector.insert.v4i1.nxv2i1( %v, <4 x i1> %sv, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index 6da83644413bc..a67ba6b362848 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -70,8 +70,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu -; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -81,8 +81,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v8, v11 +; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret @@ -195,8 +195,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V128-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; V128-NEXT: vid.v v8 ; V128-NEXT: vsrl.vi v8, v8, 1 -; V128-NEXT: vmv.v.i v0, 10 ; V128-NEXT: vadd.vi v8, v8, 1 +; V128-NEXT: vmv.v.i v0, 10 ; V128-NEXT: vrgather.vv v10, v9, v8, v0.t ; V128-NEXT: vmv.v.v v8, v10 ; V128-NEXT: ret @@ -210,8 +210,8 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) { ; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, mu ; V512-NEXT: vid.v v8 ; V512-NEXT: vsrl.vi v8, v8, 1 -; V512-NEXT: vmv.v.i v0, 10 ; V512-NEXT: vadd.vi v8, v8, 1 +; V512-NEXT: vmv.v.i v0, 10 ; V512-NEXT: vrgather.vv v10, v9, v8, v0.t ; V512-NEXT: vmv1r.v v8, v10 ; V512-NEXT: ret @@ -409,16 +409,17 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-NEXT: slli a0, a0, 3 ; V128-NEXT: sub sp, sp, a0 ; V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; V128-NEXT: vmv8r.v v0, v16 -; V128-NEXT: addi a0, sp, 16 -; V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; V128-NEXT: vmv8r.v v24, v16 ; V128-NEXT: vmv8r.v v16, v8 ; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; V128-NEXT: vslidedown.vi v8, v0, 16 +; V128-NEXT: vmv8r.v v8, v24 +; V128-NEXT: addi a0, sp, 16 +; V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; V128-NEXT: vslidedown.vi v0, v24, 16 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; V128-NEXT: vwaddu.vv v24, v0, v8 +; V128-NEXT: vwaddu.vv v24, v8, v0 ; V128-NEXT: li a0, -1 -; V128-NEXT: vwmaccu.vx v24, a0, v8 +; V128-NEXT: vwmaccu.vx v24, a0, v0 ; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; V128-NEXT: vslidedown.vi v0, v16, 16 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma @@ -426,9 +427,9 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-NEXT: vwmaccu.vx v8, a0, v16 ; V128-NEXT: lui a1, 699051 ; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: li a2, 32 ; V128-NEXT: vmv.s.x v0, a1 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v8, v24, v0 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: addi a1, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 0e8d9cf030669..33922193492e2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -89,8 +89,8 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v11, (a0) -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -162,16 +162,16 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v16, 2 -; RV32-NEXT: li a0, 5 -; RV32-NEXT: vslide1down.vx v20, v16, a0 ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV32-NEXT: vle16.v v21, (a0) +; RV32-NEXT: vle16.v v20, (a0) +; RV32-NEXT: li a0, 5 +; RV32-NEXT: vslide1down.vx v21, v16, a0 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v16, v8, v20 ; RV32-NEXT: li a0, 164 ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgatherei16.vv v16, v8, v21 -; RV32-NEXT: vrgatherei16.vv v16, v12, v20, v0.t +; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -213,9 +213,9 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { ; RV32-NEXT: lui a0, %hi(.LCPI12_1) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) ; RV32-NEXT: vle16.v v17, (a0) +; RV32-NEXT: vrgatherei16.vv v12, v20, v16 ; RV32-NEXT: li a0, 113 ; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vrgatherei16.vv v12, v20, v16 ; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret @@ -368,9 +368,9 @@ define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -387,9 +387,9 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv.s.x v11, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -422,9 +422,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v11, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -441,9 +441,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v11, v10, 2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -464,9 +464,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: li a0, 98 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -667,9 +667,9 @@ define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_slidedown: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: li a0, 195 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> @@ -681,9 +681,9 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w ; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: li a0, 234 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> @@ -698,9 +698,9 @@ define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) ; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: li a0, 234 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 03e99baf91c08..8a527a4f24e75 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -3315,40 +3315,40 @@ define void @mulhu_v16i16(ptr %x) { ; RV32-LABEL: mulhu_v16i16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v10, (a0) +; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: li a1, 257 ; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: lui a1, 1048568 -; RV32-NEXT: vmerge.vxm v12, v8, a1, v0 +; RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; RV32-NEXT: lui a1, 4 ; RV32-NEXT: addi a1, a1, 64 -; RV32-NEXT: vmv.s.x v8, a1 +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: vmv.v.i v12, 0 +; RV32-NEXT: vmerge.vim v12, v12, 1, v0 +; RV32-NEXT: vmv1r.v v18, v0 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: lui a1, %hi(.LCPI182_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) ; RV32-NEXT: vle16.v v14, (a1) -; RV32-NEXT: vsext.vf2 v16, v9 -; RV32-NEXT: vsrl.vv v16, v10, v16 -; RV32-NEXT: vmulhu.vv v14, v16, v14 -; RV32-NEXT: vsub.vv v10, v10, v14 -; RV32-NEXT: vmulhu.vv v10, v10, v12 -; RV32-NEXT: vadd.vv v10, v10, v14 +; RV32-NEXT: vsext.vf2 v16, v12 +; RV32-NEXT: vsrl.vv v12, v8, v16 +; RV32-NEXT: vmulhu.vv v12, v12, v14 +; RV32-NEXT: vsub.vv v8, v8, v12 +; RV32-NEXT: vmulhu.vv v8, v8, v10 +; RV32-NEXT: vadd.vv v8, v8, v12 ; RV32-NEXT: lui a1, 2 ; RV32-NEXT: addi a1, a1, 289 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 3 -; RV32-NEXT: vmerge.vim v9, v9, 2, v0 -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v8, v9, 1, v0 +; RV32-NEXT: vmv.v.i v10, 3 +; RV32-NEXT: vmerge.vim v10, v10, 2, v0 +; RV32-NEXT: vmv1r.v v0, v18 +; RV32-NEXT: vmerge.vim v10, v10, 1, v0 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vsext.vf2 v12, v8 -; RV32-NEXT: vsrl.vv v8, v10, v12 +; RV32-NEXT: vsext.vf2 v12, v10 +; RV32-NEXT: vsrl.vv v8, v8, v12 ; RV32-NEXT: vse16.v v8, (a0) ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index f98cb343a2ab4..2781a153b2b40 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -176,27 +176,22 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vslideup.vi v8, v16, 4 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: li a5, 12 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: lui a4, 12 -; RV32-NEXT: vmv.s.x v3, a4 +; RV32-NEXT: vmv.s.x v0, a4 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 24 +; RV32-NEXT: li a5, 40 ; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vmv1r.v v0, v3 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 2 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs1r.v v3, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vslideup.vi v8, v16, 10, v0.t ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 20 @@ -204,12 +199,18 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vmv1r.v v3, v0 ; RV32-NEXT: lui a4, %hi(.LCPI6_0) ; RV32-NEXT: addi a4, a4, %lo(.LCPI6_0) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: vle16.v v8, (a4) ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: slli a4, a4, 2 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill @@ -221,14 +222,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vs4r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v16, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vle32.v v24, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 48 +; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -236,13 +237,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi a1, a5, -64 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 12 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload @@ -271,60 +271,58 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vslideup.vi v8, v16, 2 -; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 24 +; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v8, v16, 8, v0.t -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vslideup.vi v8, v24, 8, v0.t +; RV32-NEXT: vmv.v.v v16, v8 ; RV32-NEXT: lui a1, %hi(.LCPI6_2) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_2) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a3, %hi(.LCPI6_3) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_3) -; RV32-NEXT: vle16.v v24, (a1) +; RV32-NEXT: vle16.v v20, (a1) ; RV32-NEXT: vle16.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v0, v24 +; RV32-NEXT: vrgatherei16.vv v8, v0, v20 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 12 +; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 48 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t +; RV32-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v20, v0.t ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vmv.v.v v16, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 12 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_4) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_4) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu @@ -333,57 +331,53 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v12, v24, v8 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v12, v16, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v3, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 40 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vslideup.vi v12, v16, 6, v0.t -; RV32-NEXT: vmv.v.v v4, v12 +; RV32-NEXT: vmv.v.v v16, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_5) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_5) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a3, %hi(.LCPI6_6) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_6) -; RV32-NEXT: vle16.v v24, (a1) -; RV32-NEXT: vle16.v v8, (a3) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vle16.v v20, (a1) +; RV32-NEXT: vle16.v v4, (a3) ; RV32-NEXT: li a1, 960 -; RV32-NEXT: vmv.s.x v2, a1 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v24 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v20 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v4, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v4, v8 +; RV32-NEXT: vmv.v.v v16, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_7) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu @@ -392,177 +386,189 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v28, v24, v8 -; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v12, v16, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 24 +; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v28, v8, 4, v0.t -; RV32-NEXT: vmv.v.v v4, v28 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v12, v16, 4, v0.t +; RV32-NEXT: vmv.v.v v4, v12 ; RV32-NEXT: lui a1, %hi(.LCPI6_8) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_8) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a3, %hi(.LCPI6_9) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_9) -; RV32-NEXT: vle16.v v28, (a1) -; RV32-NEXT: vle16.v v24, (a3) +; RV32-NEXT: vle16.v v0, (a1) +; RV32-NEXT: vle16.v v8, (a3) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v28 -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; RV32-NEXT: vrgatherei16.vv v8, v16, v0 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v8, v24, v16, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v4, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; RV32-NEXT: lui a1, %hi(.LCPI6_10) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_10) -; RV32-NEXT: vle16.v v4, (a1) -; RV32-NEXT: lui a1, 15 -; RV32-NEXT: vmv.s.x v6, a1 +; RV32-NEXT: vle16.v v12, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vslideup.vi v28, v24, 6 -; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vslideup.vi v24, v16, 6 +; RV32-NEXT: lui a1, 15 +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 24 +; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v28, v8, v4, v0.t +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v16, v12, v0.t +; RV32-NEXT: vmv1r.v v3, v0 ; RV32-NEXT: lui a1, %hi(.LCPI6_11) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_11) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a3, %hi(.LCPI6_12) ; RV32-NEXT: addi a3, a3, %lo(.LCPI6_12) -; RV32-NEXT: vle16.v v0, (a1) -; RV32-NEXT: vle16.v v24, (a3) +; RV32-NEXT: vle16.v v4, (a1) +; RV32-NEXT: vle16.v v28, (a3) ; RV32-NEXT: li a1, 1008 -; RV32-NEXT: vmv.s.x v7, a1 +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 +; RV32-NEXT: li a3, 48 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v0 -; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v16, v8, v4 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 48 +; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v16, v8, v28, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v28, v8 +; RV32-NEXT: vmv.v.v v24, v16 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI6_13) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_13) ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; RV32-NEXT: vle16.v v8, (a1) -; RV32-NEXT: vmv1r.v v0, v6 +; RV32-NEXT: vle16.v v12, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: li a3, 12 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 24 +; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v16, v8, v0.t +; RV32-NEXT: vrgatherei16.vv v8, v16, v12, v0.t ; RV32-NEXT: lui a1, %hi(.LCPI6_14) ; RV32-NEXT: addi a1, a1, %lo(.LCPI6_14) ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, mu ; RV32-NEXT: lui a2, %hi(.LCPI6_15) ; RV32-NEXT: addi a2, a2, %lo(.LCPI6_15) -; RV32-NEXT: vle16.v v16, (a1) -; RV32-NEXT: vle16.v v8, (a2) +; RV32-NEXT: vle16.v v12, (a1) +; RV32-NEXT: vle16.v v16, (a2) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 +; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v0, v16 +; RV32-NEXT: vrgatherei16.vv v16, v0, v12 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 +; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v4, v0.t +; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v16, v24, v12, v0.t ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v24, v8 +; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: addi a1, a0, 320 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vse32.v v24, (a1) +; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 256 -; RV32-NEXT: vse32.v v28, (a1) +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 192 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 2 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 12 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload @@ -586,236 +592,289 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 52 +; RV64-NEXT: li a3, 54 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x34, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 52 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x36, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 54 * vlenb ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: addi a2, a1, 256 -; RV64-NEXT: vle64.v v16, (a2) +; RV64-NEXT: vle64.v v24, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 27 +; RV64-NEXT: li a3, 30 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; RV64-NEXT: addi a2, a1, 128 ; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 35 +; RV64-NEXT: li a3, 46 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 +; RV64-NEXT: li a2, 38 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vrgather.vi v8, v16, 4 +; RV64-NEXT: vrgather.vi v8, v24, 4 ; RV64-NEXT: li a1, 128 -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v16, 8 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vmv1r.v v28, v0 +; RV64-NEXT: vmv.s.x v12, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 1 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t +; RV64-NEXT: vs1r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma +; RV64-NEXT: vslidedown.vi v24, v24, 8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 19 +; RV64-NEXT: li a2, 21 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv.v.v v4, v8 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t +; RV64-NEXT: vmv.v.v v24, v8 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RV64-NEXT: li a1, 6 ; RV64-NEXT: vid.v v8 -; RV64-NEXT: vmul.vx v2, v8, a1 +; RV64-NEXT: vmul.vx v28, v8, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vrgatherei16.vv v8, v16, v28 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: li a1, 56 +; RV64-NEXT: vmv.s.x v16, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 +; RV64-NEXT: li a2, 13 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v2 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: li a1, 56 -; RV64-NEXT: vmv.s.x v1, a1 -; RV64-NEXT: vadd.vi v30, v2, -16 +; RV64-NEXT: vs1r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vadd.vi v30, v28, -16 +; RV64-NEXT: vmv.v.v v6, v28 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 35 +; RV64-NEXT: li a2, 46 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgatherei16.vv v8, v16, v30, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v4, v8 +; RV64-NEXT: vmv.v.v v24, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 4 -; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 27 +; RV64-NEXT: li a2, 30 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v4, v16, 5 -; RV64-NEXT: vmv1r.v v0, v28 -; RV64-NEXT: vrgather.vi v4, v24, 3, v0.t -; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs2r.v v2, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v16, v2, 1 -; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v8, v24, 5 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 +; RV64-NEXT: li a2, 21 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v24, v16 -; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v16, v2, -15 +; RV64-NEXT: vrgather.vi v8, v24, 3, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RV64-NEXT: vadd.vi v8, v6, 1 +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 11 +; RV64-NEXT: li a2, 38 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs2r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v24, v16, v8 +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64-NEXT: vadd.vi v18, v6, -15 +; RV64-NEXT: vmv2r.v v16, v6 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 35 +; RV64-NEXT: li a2, 46 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 11 +; RV64-NEXT: li a2, 13 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v2, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v2, v0.t +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v24, v8, v18, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v4, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 11 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl2r.v v2, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v6, v2, 2 +; RV64-NEXT: vmv2r.v v30, v6 +; RV64-NEXT: vadd.vi v20, v6, 2 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v8, v24, v6 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 38 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v0, v20 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: li a1, 24 -; RV64-NEXT: vmv.s.x v7, a1 -; RV64-NEXT: vadd.vi v26, v2, -14 +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vadd.vi v20, v30, -14 +; RV64-NEXT: vmv2r.v v22, v30 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: vrgatherei16.vv v8, v16, v26, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 46 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v24, v20, v0.t ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.i v12, 6 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 27 +; RV64-NEXT: li a2, 30 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgatherei16.vv v16, v24, v12 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 1 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 21 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v6, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 19 +; RV64-NEXT: li a2, 29 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgather.vi v16, v24, 4, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v16, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 3 -; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v28, v2, 3 +; RV64-NEXT: vadd.vi v20, v22, 3 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 +; RV64-NEXT: li a2, 38 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v28 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v0, v20 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v28, v2, -13 +; RV64-NEXT: vadd.vi v20, v22, -13 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 35 +; RV64-NEXT: li a2, 46 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v28, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v24, v20, v0.t ; RV64-NEXT: lui a1, 16 ; RV64-NEXT: addi a1, a1, 7 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.x v12, a1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 27 +; RV64-NEXT: li a2, 30 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v24, v12 -; RV64-NEXT: vmv1r.v v0, v6 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv4r.v v16, v0 +; RV64-NEXT: vrgatherei16.vv v4, v0, v12 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 19 +; RV64-NEXT: li a2, 21 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v16, v24, 5, v0.t +; RV64-NEXT: vrgather.vi v4, v24, 5, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v16, v8 +; RV64-NEXT: vmv.v.v v4, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: slli a2, a1, 2 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 96 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.x v8, a1 @@ -823,98 +882,115 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: li a1, 192 ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 27 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v4, v16, 2 -; RV64-NEXT: vrgatherei16.vv v4, v24, v8, v0.t +; RV64-NEXT: vrgather.vi v12, v16, 2 +; RV64-NEXT: vrgatherei16.vv v12, v24, v8, v0.t +; RV64-NEXT: vmv.v.v v16, v12 ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v16, v2, 4 +; RV64-NEXT: vadd.vi v20, v22, 4 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 +; RV64-NEXT: li a2, 38 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v24, v16 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v0, v20 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: li a1, 28 -; RV64-NEXT: vmv.s.x v1, a1 -; RV64-NEXT: vadd.vi v16, v2, -12 +; RV64-NEXT: vmv.s.x v20, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vadd.vi v20, v22, -12 +; RV64-NEXT: vmv2r.v v6, v22 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 35 +; RV64-NEXT: li a2, 46 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v24, v16, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v8, v24, v20, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v4, v8 +; RV64-NEXT: vmv.v.v v16, v8 +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 112 ; RV64-NEXT: addi a1, a1, 1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v12, a1 +; RV64-NEXT: vmv.v.x v8, a1 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 27 +; RV64-NEXT: li a2, 30 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v16, 3 +; RV64-NEXT: vrgather.vi v12, v16, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 19 +; RV64-NEXT: li a2, 21 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v8, v16, v12, v0.t +; RV64-NEXT: vrgatherei16.vv v12, v16, v8, v0.t ; RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v12, v2, 5 +; RV64-NEXT: vmv2r.v v10, v6 +; RV64-NEXT: vadd.vi v8, v6, 5 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 43 +; RV64-NEXT: li a2, 38 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v24, v12 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v16, v0, v8 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vadd.vi v12, v2, -11 +; RV64-NEXT: vadd.vi v8, v10, -11 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 35 +; RV64-NEXT: li a2, 46 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v16, v24, v12, v0.t +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v16, v24, v8, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vmv.v.v v12, v16 ; RV64-NEXT: addi a1, a0, 320 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 256 -; RV64-NEXT: vse64.v v4, (a1) +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 192 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: slli a3, a2, 2 ; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -922,29 +998,29 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 3 -; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: li a3, 13 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 11 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 4 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 4 -; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 52 +; RV64-NEXT: li a1, 54 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index c295fed2c28c1..8986c876904b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -141,8 +141,8 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -151,8 +151,8 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { ; ZVE32F-LABEL: buildvec_mask_nonconst_v4i1: ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32F-NEXT: vmv.v.i v0, 3 ; ZVE32F-NEXT: vmv.v.x v8, a1 +; ZVE32F-NEXT: vmv.v.i v0, 3 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -246,9 +246,9 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v8i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: li a2, 19 -; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -257,9 +257,9 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1: ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; ZVE32F-NEXT: li a2, 19 -; ZVE32F-NEXT: vmv.s.x v0, a2 ; ZVE32F-NEXT: vmv.v.x v8, a1 +; ZVE32F-NEXT: li a1, 19 +; ZVE32F-NEXT: vmv.s.x v0, a1 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -286,8 +286,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, zero -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -303,8 +303,8 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -331,8 +331,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, zero -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -348,8 +348,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -375,8 +375,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -391,8 +391,8 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll index bb125ba773b89..63c6dae57442d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -25,10 +25,10 @@ define void @splat_v1i1(ptr %x, i1 %y) { ; CHECK-LABEL: splat_v1i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: andi a1, a1, 1 -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 9fbc22221f99b..be9b5a75a5f49 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -12631,8 +12631,8 @@ define <4 x i32> @mgather_narrow_edge_case(ptr %base) { ; RV32: # %bb.0: ; RV32-NEXT: li a1, -512 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV32-NEXT: vluxei32.v v8, (a0), v8 ; RV32-NEXT: ret @@ -12728,8 +12728,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV32-NEXT: vmv.v.x v8, a3 ; RV32-NEXT: vslide1down.vx v8, v8, a5 ; RV32-NEXT: vslide1down.vx v8, v8, a6 -; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslide1down.vx v8, v8, a7 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV32-NEXT: ret ; @@ -12803,8 +12803,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64V-NEXT: vmv.v.x v8, a3 ; RV64V-NEXT: vslide1down.vx v8, v8, a5 ; RV64V-NEXT: vslide1down.vx v8, v8, a6 -; RV64V-NEXT: vmv.v.i v0, 15 ; RV64V-NEXT: vslide1down.vx v8, v8, a7 +; RV64V-NEXT: vmv.v.i v0, 15 ; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64V-NEXT: addi sp, s0, -128 ; RV64V-NEXT: ld ra, 120(sp) # 8-byte Folded Reload @@ -12854,8 +12854,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: vmv.v.x v8, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -12896,8 +12896,8 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -12941,8 +12941,8 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -12986,8 +12986,8 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13031,8 +13031,8 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a7 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13074,8 +13074,8 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13120,8 +13120,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13167,8 +13167,8 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13217,8 +13217,8 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13264,8 +13264,8 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -13320,8 +13320,8 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a3 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -13367,8 +13367,8 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll index 59a0b45646dd4..bbff66e49d201 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll @@ -368,12 +368,12 @@ define void @masked_load_v32f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV32-NEXT: vle64.v v8, (a1) ; RV32-NEXT: vle64.v v16, (a3) ; RV32-NEXT: fcvt.d.w fa5, zero -; RV32-NEXT: vmfeq.vf v8, v8, fa5 -; RV32-NEXT: vmfeq.vf v0, v16, fa5 -; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: vle64.v v16, (a1), v0.t -; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vmfeq.vf v0, v8, fa5 +; RV32-NEXT: vmfeq.vf v16, v16, fa5 ; RV32-NEXT: vle64.v v8, (a0), v0.t +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vle64.v v16, (a0), v0.t ; RV32-NEXT: vse64.v v8, (a2) ; RV32-NEXT: addi a0, a2, 128 ; RV32-NEXT: vse64.v v16, (a0) @@ -386,12 +386,12 @@ define void @masked_load_v32f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: vle64.v v16, (a3) ; RV64-NEXT: fmv.d.x fa5, zero -; RV64-NEXT: vmfeq.vf v8, v8, fa5 -; RV64-NEXT: vmfeq.vf v0, v16, fa5 -; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: vle64.v v16, (a1), v0.t -; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vmfeq.vf v0, v8, fa5 +; RV64-NEXT: vmfeq.vf v16, v16, fa5 ; RV64-NEXT: vle64.v v8, (a0), v0.t +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vle64.v v16, (a0), v0.t ; RV64-NEXT: vse64.v v8, (a2) ; RV64-NEXT: addi a0, a2, 128 ; RV64-NEXT: vse64.v v16, (a0) @@ -432,12 +432,12 @@ define void @masked_load_v64f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vle32.v v16, (a3) ; CHECK-NEXT: fmv.w.x fa5, zero -; CHECK-NEXT: vmfeq.vf v8, v8, fa5 -; CHECK-NEXT: vmfeq.vf v0, v16, fa5 -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle32.v v16, (a1), v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmfeq.vf v0, v8, fa5 +; CHECK-NEXT: vmfeq.vf v16, v16, fa5 ; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 ; CHECK-NEXT: vse32.v v16, (a0) @@ -459,12 +459,12 @@ define void @masked_load_v128f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vle16.v v8, (a1) ; CHECK-NEXT: vle16.v v16, (a3) ; CHECK-NEXT: fmv.h.x fa5, zero -; CHECK-NEXT: vmfeq.vf v8, v8, fa5 -; CHECK-NEXT: vmfeq.vf v0, v16, fa5 -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle16.v v16, (a1), v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmfeq.vf v0, v8, fa5 +; CHECK-NEXT: vmfeq.vf v16, v16, fa5 ; CHECK-NEXT: vle16.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 ; CHECK-NEXT: vse16.v v16, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index b499709711b8c..7e825b5fcfe52 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -401,18 +401,18 @@ define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi a3, a1, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v16, (a3) -; RV32-NEXT: vle64.v v8, (a1) +; RV32-NEXT: vle64.v v8, (a3) +; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v24, 0 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v8, v8, v24 ; RV32-NEXT: vmseq.vv v0, v16, v24 -; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: vle64.v v16, (a1), v0.t -; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vmseq.vv v16, v8, v24 ; RV32-NEXT: vle64.v v8, (a0), v0.t +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vle64.v v16, (a0), v0.t ; RV32-NEXT: vse64.v v8, (a2) ; RV32-NEXT: addi a0, a2, 128 ; RV32-NEXT: vse64.v v16, (a0) @@ -424,12 +424,12 @@ define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: vle64.v v16, (a3) -; RV64-NEXT: vmseq.vi v8, v8, 0 -; RV64-NEXT: vmseq.vi v0, v16, 0 -; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: vle64.v v16, (a1), v0.t -; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vmseq.vi v0, v8, 0 +; RV64-NEXT: vmseq.vi v16, v16, 0 ; RV64-NEXT: vle64.v v8, (a0), v0.t +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vle64.v v16, (a0), v0.t ; RV64-NEXT: vse64.v v8, (a2) ; RV64-NEXT: addi a0, a2, 128 ; RV64-NEXT: vse64.v v16, (a0) @@ -486,12 +486,12 @@ define void @masked_load_v64i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vle32.v v16, (a3) -; CHECK-NEXT: vmseq.vi v8, v8, 0 -; CHECK-NEXT: vmseq.vi v0, v16, 0 -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle32.v v16, (a1), v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vmseq.vi v16, v16, 0 ; CHECK-NEXT: vle32.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 ; CHECK-NEXT: vse32.v v16, (a0) @@ -530,12 +530,12 @@ define void @masked_load_v256i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vsetvli zero, a4, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle8.v v16, (a3) -; CHECK-NEXT: vmseq.vi v8, v8, 0 -; CHECK-NEXT: vmseq.vi v0, v16, 0 -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle8.v v16, (a1), v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vmseq.vi v16, v16, 0 ; CHECK-NEXT: vle8.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vle8.v v16, (a0), v0.t ; CHECK-NEXT: vse8.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 ; CHECK-NEXT: vse8.v v16, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll index fa259f832bc23..7b9a1d88fa9a0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll @@ -364,36 +364,38 @@ define void @masked_store_v32f64(<32 x double>* %val_ptr, <32 x double>* %a, <32 ; RV32-LABEL: masked_store_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v16, (a2) -; RV32-NEXT: addi a2, a2, 128 +; RV32-NEXT: addi a3, a2, 128 +; RV32-NEXT: vle64.v v16, (a3) ; RV32-NEXT: vle64.v v8, (a2) ; RV32-NEXT: fcvt.d.w fa5, zero ; RV32-NEXT: vmfeq.vf v0, v16, fa5 +; RV32-NEXT: vmv1r.v v7, v0 ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v24, (a0) -; RV32-NEXT: vmfeq.vf v8, v8, fa5 +; RV32-NEXT: vmfeq.vf v0, v8, fa5 ; RV32-NEXT: vse64.v v16, (a1), v0.t ; RV32-NEXT: addi a0, a1, 128 -; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: vse64.v v24, (a0), v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: masked_store_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v16, (a2) -; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: addi a3, a2, 128 +; RV64-NEXT: vle64.v v16, (a3) ; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: fmv.d.x fa5, zero ; RV64-NEXT: vmfeq.vf v0, v16, fa5 +; RV64-NEXT: vmv1r.v v7, v0 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vmfeq.vf v8, v8, fa5 +; RV64-NEXT: vmfeq.vf v0, v8, fa5 ; RV64-NEXT: vse64.v v16, (a1), v0.t ; RV64-NEXT: addi a0, a1, 128 -; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: vse64.v v24, (a0), v0.t ; RV64-NEXT: ret %m = load <32 x double>, <32 x double>* %m_ptr @@ -428,18 +430,19 @@ define void @masked_store_v64f32(<64 x float>* %val_ptr, <64 x float>* %a, <64 x ; CHECK: # %bb.0: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: addi a2, a2, 128 +; CHECK-NEXT: addi a3, a2, 128 +; CHECK-NEXT: vle32.v v16, (a3) ; CHECK-NEXT: vle32.v v8, (a2) ; CHECK-NEXT: fmv.w.x fa5, zero ; CHECK-NEXT: vmfeq.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vmfeq.vf v8, v8, fa5 +; CHECK-NEXT: vmfeq.vf v0, v8, fa5 ; CHECK-NEXT: vse32.v v16, (a1), v0.t ; CHECK-NEXT: addi a0, a1, 128 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vse32.v v24, (a0), v0.t ; CHECK-NEXT: ret %m = load <64 x float>, <64 x float>* %m_ptr @@ -455,18 +458,19 @@ define void @masked_store_v128f16(<128 x half>* %val_ptr, <128 x half>* %a, <128 ; CHECK: # %bb.0: ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v16, (a2) -; CHECK-NEXT: addi a2, a2, 128 +; CHECK-NEXT: addi a3, a2, 128 +; CHECK-NEXT: vle16.v v16, (a3) ; CHECK-NEXT: vle16.v v8, (a2) ; CHECK-NEXT: fmv.h.x fa5, zero ; CHECK-NEXT: vmfeq.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v24, (a0) -; CHECK-NEXT: vmfeq.vf v8, v8, fa5 +; CHECK-NEXT: vmfeq.vf v0, v8, fa5 ; CHECK-NEXT: vse16.v v16, (a1), v0.t ; CHECK-NEXT: addi a0, a1, 128 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vse16.v v24, (a0), v0.t ; CHECK-NEXT: ret %m = load <128 x half>, <128 x half>* %m_ptr diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll index adcbbd9f4070a..9db2a6b6f61f6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -401,61 +401,60 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: li a4, 10 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: sub sp, sp, a3 ; RV32-NEXT: addi a3, a2, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v0, (a3) -; RV32-NEXT: vle64.v v8, (a2) +; RV32-NEXT: vle64.v v8, (a3) +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vle64.v v24, (a2) ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, 0 +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v8, v8, v24 +; RV32-NEXT: vmseq.vv v0, v24, v16 ; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, a0, 128 -; RV32-NEXT: vle64.v v16, (a2) +; RV32-NEXT: vle64.v v24, (a2) ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: vmseq.vv v0, v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmseq.vv v0, v0, v16 ; RV32-NEXT: addi a0, a1, 128 -; RV32-NEXT: vse64.v v16, (a0), v0.t +; RV32-NEXT: vse64.v v24, (a0), v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vse64.v v8, (a1), v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: masked_store_v32i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v8, (a2) -; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: addi a3, a2, 128 +; RV64-NEXT: vle64.v v8, (a3) ; RV64-NEXT: vle64.v v16, (a2) -; RV64-NEXT: addi a2, sp, 16 -; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmseq.vi v0, v8, 0 -; RV64-NEXT: vle64.v v24, (a0) +; RV64-NEXT: vmv1r.v v7, v0 +; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vmseq.vi v8, v8, 0 -; RV64-NEXT: vse64.v v24, (a1), v0.t +; RV64-NEXT: vle64.v v24, (a0) +; RV64-NEXT: vmseq.vi v0, v16, 0 +; RV64-NEXT: vse64.v v8, (a1), v0.t ; RV64-NEXT: addi a0, a1, 128 -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vse64.v v16, (a0), v0.t -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: vse64.v v24, (a0), v0.t ; RV64-NEXT: ret %m = load <32 x i64>, ptr %m_ptr %mask = icmp eq <32 x i64> %m, zeroinitializer @@ -504,32 +503,21 @@ declare void @llvm.masked.store.v64i16.p0(<64 x i16>, ptr, i32, <64 x i1>) define void @masked_store_v64i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; CHECK-LABEL: masked_store_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: sub sp, sp, a3 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v8, (a2) -; CHECK-NEXT: addi a2, a2, 128 +; CHECK-NEXT: addi a3, a2, 128 +; CHECK-NEXT: vle32.v v8, (a3) ; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vi v8, v8, 0 -; CHECK-NEXT: vse32.v v24, (a1), v0.t +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vmseq.vi v0, v16, 0 +; CHECK-NEXT: vse32.v v8, (a1), v0.t ; CHECK-NEXT: addi a0, a1, 128 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vse32.v v16, (a0), v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vse32.v v24, (a0), v0.t ; CHECK-NEXT: ret %m = load <64 x i32>, ptr %m_ptr %mask = icmp eq <64 x i32> %m, zeroinitializer @@ -560,32 +548,21 @@ declare void @llvm.masked.store.v128i8.p0(<128 x i8>, ptr, i32, <128 x i1>) define void @masked_store_v128i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; CHECK-LABEL: masked_store_v128i16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: sub sp, sp, a3 ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v8, (a2) -; CHECK-NEXT: addi a2, a2, 128 +; CHECK-NEXT: addi a3, a2, 128 +; CHECK-NEXT: vle16.v v8, (a3) ; CHECK-NEXT: vle16.v v16, (a2) -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vle16.v v24, (a0) +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vi v8, v8, 0 -; CHECK-NEXT: vse16.v v24, (a1), v0.t +; CHECK-NEXT: vle16.v v24, (a0) +; CHECK-NEXT: vmseq.vi v0, v16, 0 +; CHECK-NEXT: vse16.v v8, (a1), v0.t ; CHECK-NEXT: addi a0, a1, 128 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vse16.v v16, (a0), v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vse16.v v24, (a0), v0.t ; CHECK-NEXT: ret %m = load <128 x i16>, ptr %m_ptr %mask = icmp eq <128 x i16> %m, zeroinitializer @@ -598,32 +575,21 @@ declare void @llvm.masked.store.v128i16.p0(<128 x i16>, ptr, i32, <128 x i1>) define void @masked_store_v256i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; CHECK-LABEL: masked_store_v256i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 3 -; CHECK-NEXT: sub sp, sp, a3 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a2) -; CHECK-NEXT: addi a2, a2, 128 +; CHECK-NEXT: addi a3, a2, 128 +; CHECK-NEXT: vle8.v v8, (a3) ; CHECK-NEXT: vle8.v v16, (a2) -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vi v8, v8, 0 -; CHECK-NEXT: vse8.v v24, (a1), v0.t +; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: vmseq.vi v0, v16, 0 +; CHECK-NEXT: vse8.v v8, (a1), v0.t ; CHECK-NEXT: addi a0, a1, 128 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vse8.v v16, (a0), v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vse8.v v24, (a0), v0.t ; CHECK-NEXT: ret %m = load <256 x i8>, ptr %m_ptr %mask = icmp eq <256 x i8> %m, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index efa2a8c2e41b0..f84f79ee6e00f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -555,20 +555,9 @@ declare <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -577,49 +566,32 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmv1r.v v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv.v.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll index 9df160bf30f00..c83ce798a2f6f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -179,24 +179,24 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: vslidedown.vi v24, v0, 4 +; CHECK-NEXT: vslidedown.vi v25, v0, 4 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB8_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmv.s.f v25, fa0 +; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v25, v8, v25, v0.t +; CHECK-NEXT: vfredusum.vs v24, v8, v24, v0.t ; CHECK-NEXT: addi a1, a0, -32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vfredusum.vs v24, v16, v24, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v24 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) ret float %r @@ -207,24 +207,24 @@ define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: vslidedown.vi v24, v0, 4 +; CHECK-NEXT: vslidedown.vi v25, v0, 4 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB9_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmv.s.f v25, fa0 +; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vfredosum.vs v25, v8, v25, v0.t +; CHECK-NEXT: vfredosum.vs v24, v8, v24, v0.t ; CHECK-NEXT: addi a1, a0, -32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t -; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vfredosum.vs v24, v16, v24, v0.t +; CHECK-NEXT: vfmv.f.s fa0, v24 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) ret float %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 6d7d53689d097..b957f677d1c58 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1780,55 +1780,26 @@ declare float @llvm.vector.reduce.fminimum.v64f32(<64 x float>) define float @vreduce_fminimum_v64f32(ptr %x) { ; CHECK-LABEL: vreduce_fminimum_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a1, a1, a2 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v8, v24, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB109_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 -; CHECK-NEXT: j .LBB109_3 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB109_2: ; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: .LBB109_3: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = load <64 x float>, ptr %x %red = call float @llvm.vector.reduce.fminimum.v64f32(<64 x float> %v) @@ -1862,108 +1833,118 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a2, a2, a1 -; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle32.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle32.v v8, (a1) -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: vle32.v v8, (a2) ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a3, a2, 4 ; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 256 ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a2, a0, 3 -; CHECK-NEXT: add a0, a2, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle32.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v24, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v24, v24, v8 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v8, v16, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v8, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB111_2 @@ -1978,9 +1959,7 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a1, a1, a0 -; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -2162,54 +2141,25 @@ declare double @llvm.vector.reduce.fminimum.v32f64(<32 x double>) define double @vreduce_fminimum_v32f64(ptr %x) { ; CHECK-LABEL: vreduce_fminimum_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a1, a1, a2 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v8, v24, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB121_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI121_0) ; CHECK-NEXT: fld fa0, %lo(.LCPI121_0)(a0) -; CHECK-NEXT: j .LBB121_3 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB121_2: ; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: .LBB121_3: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = load <32 x double>, ptr %x %red = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> %v) @@ -2242,107 +2192,117 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a2, a2, a1 -; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 4 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a2, a0, 3 -; CHECK-NEXT: add a0, a2, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v24, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v24, v24, v8 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v8, v16, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v8, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB123_2 @@ -2357,9 +2317,7 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a1, a1, a0 -; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -2578,55 +2536,26 @@ declare float @llvm.vector.reduce.fmaximum.v64f32(<64 x float>) define float @vreduce_fmaximum_v64f32(ptr %x) { ; CHECK-LABEL: vreduce_fmaximum_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a1, a1, a2 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v8, v24, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB135_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: fmv.w.x fa0, a0 -; CHECK-NEXT: j .LBB135_3 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB135_2: ; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: .LBB135_3: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = load <64 x float>, ptr %x %red = call float @llvm.vector.reduce.fmaximum.v64f32(<64 x float> %v) @@ -2660,108 +2589,118 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a2, a2, a1 -; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle32.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle32.v v8, (a1) -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: vle32.v v8, (a2) ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a3, a2, 4 ; CHECK-NEXT: add a2, a3, a2 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 256 ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a2, a0, 3 -; CHECK-NEXT: add a0, a2, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle32.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v24, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v24, v24, v8 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v8, v16, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v8, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB137_2 @@ -2776,9 +2715,7 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a1, a1, a0 -; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -2960,54 +2897,25 @@ declare double @llvm.vector.reduce.fmaximum.v32f64(<32 x double>) define double @vreduce_fmaximum_v32f64(ptr %x) { ; CHECK-LABEL: vreduce_fmaximum_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a1, a1, a2 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v8, v24, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB147_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI147_0) ; CHECK-NEXT: fld fa0, %lo(.LCPI147_0)(a0) -; CHECK-NEXT: j .LBB147_3 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB147_2: ; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: .LBB147_3: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = load <32 x double>, ptr %x %red = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> %v) @@ -3040,107 +2948,117 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: mv a2, a1 -; CHECK-NEXT: slli a1, a1, 2 -; CHECK-NEXT: add a2, a2, a1 -; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 4 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a2, a0, 3 -; CHECK-NEXT: add a0, a2, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v24, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v24, v24, v8 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v8, v16, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v8, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a1, a0, 4 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB149_2 @@ -3155,9 +3073,7 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 2 -; CHECK-NEXT: add a1, a1, a0 -; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index bd3e3f88c3ea1..baff2e99126f9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -843,24 +843,24 @@ define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vslidedown.vi v24, v0, 4 +; CHECK-NEXT: vslidedown.vi v25, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB49_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB49_2: ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv.s.x v25, a0 +; CHECK-NEXT: vmv.s.x v24, a0 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vredxor.vs v25, v8, v25, v0.t +; CHECK-NEXT: vredxor.vs v24, v8, v24, v0.t ; CHECK-NEXT: addi a0, a1, -32 ; CHECK-NEXT: sltu a1, a1, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vredxor.vs v25, v16, v25, v0.t -; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vredxor.vs v24, v16, v24, v0.t +; CHECK-NEXT: vmv.x.s a0, v24 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.v64i32(i32 %s, <64 x i32> %v, <64 x i1> %m, i32 %evl) ret i32 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index b56b459535454..257cffa8ea13c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -503,20 +503,9 @@ declare <32 x double> @llvm.vp.rint.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -525,42 +514,28 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.rint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll index cd196c3b903d5..66bc4522739f0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -719,20 +719,9 @@ declare <32 x double> @llvm.vp.round.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -741,46 +730,32 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.round.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll index 6e459a7539b40..59923dd8964c0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -719,20 +719,9 @@ declare <32 x double> @llvm.vp.roundeven.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -741,46 +730,32 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.roundeven.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll index c04f205a78bbc..0439d0b631fc6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -719,20 +719,9 @@ declare <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -741,46 +730,32 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.roundtozero.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll index 4e5e210bc4d67..ec33a7079262a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -1152,12 +1152,12 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 ; ZVFH-NEXT: addi a1, a1, -1 ; ZVFH-NEXT: and a0, a1, a0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: addi a0, sp, 16 ; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vmfeq.vv v0, v16, v8, v0.t +; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: vmfeq.vv v8, v16, v8, v0.t ; ZVFH-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; ZVFH-NEXT: vslideup.vi v24, v0, 8 +; ZVFH-NEXT: vslideup.vi v24, v8, 8 ; ZVFH-NEXT: vmv.v.v v0, v24 ; ZVFH-NEXT: csrr a0, vlenb ; ZVFH-NEXT: slli a0, a0, 3 @@ -2825,19 +2825,19 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB87_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v8, v24, v0.t +; CHECK-NEXT: vmfeq.vv v24, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v16, v24, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v8, v16, v8, v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v0, 2 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vslideup.vi v24, v8, 2 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll index e89283d1968ef..65d9dd0112748 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -598,7 +598,7 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 128 @@ -618,7 +618,7 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: .LBB51_2: ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmseq.vv v0, v24, v8, v0.t @@ -635,7 +635,7 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_v256i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -650,7 +650,7 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 z ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB52_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret @@ -663,7 +663,7 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 z define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_v256i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -678,7 +678,7 @@ define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB53_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret @@ -1214,12 +1214,12 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmseq.vv v8, v16, v8, v0.t ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v24, v0, 4 +; CHECK-NEXT: vslideup.vi v24, v8, 4 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll index 038fead011d89..ab24c67ed1914 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-transpose.ll @@ -20,9 +20,9 @@ define <8 x i8> @trn2.v8i8(<8 x i8> %v0, <8 x i8> %v1) { ; CHECK-LABEL: trn2.v8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> @@ -75,8 +75,8 @@ define <4 x i16> @trn2.v4i16(<4 x i16> %v0, <4 x i16> %v1) { ; CHECK-LABEL: trn2.v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i16> %v0, <4 x i16> %v1, <4 x i32> @@ -99,9 +99,9 @@ define <8 x i16> @trn2.v8i16(<8 x i16> %v0, <8 x i16> %v1) { ; CHECK-LABEL: trn2.v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> @@ -122,8 +122,8 @@ define <2 x i32> @trn2.v2i32(<2 x i32> %v0, <2 x i32> %v1) { ; CHECK-LABEL: trn2.v2i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vrgather.vi v10, v8, 1 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x i32> %v0, <2 x i32> %v1, <2 x i32> @@ -146,8 +146,8 @@ define <4 x i32> @trn2.v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: trn2.v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <4 x i32> @@ -168,8 +168,8 @@ define <2 x i64> @trn2.v2i64(<2 x i64> %v0, <2 x i64> %v1) { ; CHECK-LABEL: trn2.v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vrgather.vi v10, v8, 1 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x i64> %v0, <2 x i64> %v1, <2 x i32> @@ -190,8 +190,8 @@ define <2 x float> @trn2.v2f32(<2 x float> %v0, <2 x float> %v1) { ; CHECK-LABEL: trn2.v2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vrgather.vi v10, v8, 1 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x float> %v0, <2 x float> %v1, <2 x i32> @@ -214,8 +214,8 @@ define <4 x float> @trn2.v4f32(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: trn2.v4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x float> %v0, <4 x float> %v1, <4 x i32> @@ -236,8 +236,8 @@ define <2 x double> @trn2.v2f64(<2 x double> %v0, <2 x double> %v1) { ; CHECK-LABEL: trn2.v2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vrgather.vi v10, v8, 1 +; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <2 x double> %v0, <2 x double> %v1, <2 x i32> @@ -260,8 +260,8 @@ define <4 x half> @trn2.v4f16(<4 x half> %v0, <4 x half> %v1) { ; CHECK-LABEL: trn2.v4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.i v0, 10 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <4 x half> %v0, <4 x half> %v1, <4 x i32> @@ -284,9 +284,9 @@ define <8 x half> @trn2.v8f16(<8 x half> %v0, <8 x half> %v1) { ; CHECK-LABEL: trn2.v8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: li a0, 170 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 ; CHECK-NEXT: ret %tmp0 = shufflevector <8 x half> %v0, <8 x half> %v1, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 6a8d2008de74d..c8ac148ad91c9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -469,7 +469,6 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x ; CHECK-LABEL: strided_vpload_v32f64: ; CHECK: # %bb.0: ; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: mv a3, a2 ; CHECK-NEXT: bltu a2, a4, .LBB40_2 ; CHECK-NEXT: # %bb.1: @@ -482,7 +481,8 @@ define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a5 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v9, 2 +; CHECK-NEXT: vslidedown.vi v8, v0, 2 +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vlse64.v v16, (a4), a1, v0.t @@ -525,7 +525,7 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-LABEL: strided_load_v33f64: ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: li a5, 32 -; CHECK-RV32-NEXT: vmv1r.v v8, v0 +; CHECK-RV32-NEXT: vmv1r.v v16, v0 ; CHECK-RV32-NEXT: mv a3, a4 ; CHECK-RV32-NEXT: bltu a4, a5, .LBB42_2 ; CHECK-RV32-NEXT: # %bb.1: @@ -543,9 +543,9 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: li a7, 16 ; CHECK-RV32-NEXT: .LBB42_4: ; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 4 +; CHECK-RV32-NEXT: vslidedown.vi v0, v16, 4 ; CHECK-RV32-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v16, (a6), a2, v0.t +; CHECK-RV32-NEXT: vlse64.v v8, (a6), a2, v0.t ; CHECK-RV32-NEXT: addi a6, a3, -16 ; CHECK-RV32-NEXT: sltu a3, a3, a6 ; CHECK-RV32-NEXT: addi a3, a3, -1 @@ -557,25 +557,27 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: mul a5, a4, a2 ; CHECK-RV32-NEXT: add a5, a1, a5 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-RV32-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-RV32-NEXT: vslidedown.vi v9, v16, 2 +; CHECK-RV32-NEXT: vmv1r.v v10, v16 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV32-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV32-NEXT: vmv1r.v v0, v9 +; CHECK-RV32-NEXT: vlse64.v v16, (a5), a2, v0.t ; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-RV32-NEXT: vmv1r.v v0, v8 -; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t +; CHECK-RV32-NEXT: vmv1r.v v0, v10 +; CHECK-RV32-NEXT: vlse64.v v24, (a1), a2, v0.t ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-RV32-NEXT: vse64.v v8, (a0) +; CHECK-RV32-NEXT: vse64.v v24, (a0) ; CHECK-RV32-NEXT: addi a1, a0, 128 -; CHECK-RV32-NEXT: vse64.v v24, (a1) +; CHECK-RV32-NEXT: vse64.v v16, (a1) ; CHECK-RV32-NEXT: addi a0, a0, 256 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vse64.v v16, (a0) +; CHECK-RV32-NEXT: vse64.v v8, (a0) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_load_v33f64: ; CHECK-RV64: # %bb.0: ; CHECK-RV64-NEXT: li a5, 32 -; CHECK-RV64-NEXT: vmv1r.v v8, v0 +; CHECK-RV64-NEXT: vmv1r.v v16, v0 ; CHECK-RV64-NEXT: mv a4, a3 ; CHECK-RV64-NEXT: bltu a3, a5, .LBB42_2 ; CHECK-RV64-NEXT: # %bb.1: @@ -593,9 +595,9 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: li a7, 16 ; CHECK-RV64-NEXT: .LBB42_4: ; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 4 +; CHECK-RV64-NEXT: vslidedown.vi v0, v16, 4 ; CHECK-RV64-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v16, (a6), a2, v0.t +; CHECK-RV64-NEXT: vlse64.v v8, (a6), a2, v0.t ; CHECK-RV64-NEXT: addi a6, a4, -16 ; CHECK-RV64-NEXT: sltu a4, a4, a6 ; CHECK-RV64-NEXT: addi a4, a4, -1 @@ -607,19 +609,21 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: mul a5, a3, a2 ; CHECK-RV64-NEXT: add a5, a1, a5 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-RV64-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-RV64-NEXT: vslidedown.vi v9, v16, 2 +; CHECK-RV64-NEXT: vmv1r.v v10, v16 ; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-RV64-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV64-NEXT: vmv1r.v v0, v9 +; CHECK-RV64-NEXT: vlse64.v v16, (a5), a2, v0.t ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV64-NEXT: vmv1r.v v0, v8 -; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t +; CHECK-RV64-NEXT: vmv1r.v v0, v10 +; CHECK-RV64-NEXT: vlse64.v v24, (a1), a2, v0.t ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-RV64-NEXT: vse64.v v8, (a0) +; CHECK-RV64-NEXT: vse64.v v24, (a0) ; CHECK-RV64-NEXT: addi a1, a0, 128 -; CHECK-RV64-NEXT: vse64.v v24, (a1) +; CHECK-RV64-NEXT: vse64.v v16, (a1) ; CHECK-RV64-NEXT: addi a0, a0, 256 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV64-NEXT: vse64.v v16, (a0) +; CHECK-RV64-NEXT: vse64.v v8, (a0) ; CHECK-RV64-NEXT: ret %v = call <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 %evl) ret <33 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index 9ce045462bccc..84412a7d075c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -222,283 +222,6 @@ define <2 x i32> @vtrunc_v2i32_v2i64_unmasked(<2 x i64> %a, i32 zeroext %vl) { declare <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64>, <128 x i1>, i32) define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 zeroext %vl) { -; CHECK-LABEL: vtrunc_v128i32_v128i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 6 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 40 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v5, v0, 8 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v26, v0, 4 -; CHECK-NEXT: addi a3, a1, 128 -; CHECK-NEXT: addi a2, a1, 512 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a2, a2, a4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v27, v5, 4 -; CHECK-NEXT: addi a2, a1, 640 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a2) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: addi a2, a7, -64 -; CHECK-NEXT: sltu a4, a7, a2 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a4, a4, a2 -; CHECK-NEXT: addi a2, a4, -32 -; CHECK-NEXT: sltu a5, a4, a2 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a5, a5, a2 -; CHECK-NEXT: addi a2, a5, -16 -; CHECK-NEXT: sltu a6, a5, a2 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a2, a6, a2 -; CHECK-NEXT: vslidedown.vi v0, v27, 2 -; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: li t0, 24 -; CHECK-NEXT: mul a6, a6, t0 -; CHECK-NEXT: add a6, sp, a6 -; CHECK-NEXT: addi a6, a6, 16 -; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a5, a2, .LBB16_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a5, 16 -; CHECK-NEXT: .LBB16_2: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v28, v26, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a3) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: li a3, 64 -; CHECK-NEXT: vmv1r.v v0, v27 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 48 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: mv a6, a7 -; CHECK-NEXT: bltu a7, a3, .LBB16_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a6, 64 -; CHECK-NEXT: .LBB16_4: -; CHECK-NEXT: addi a5, a1, 384 -; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: slli t0, t0, 3 -; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 16 -; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; CHECK-NEXT: addi t0, a6, -32 -; CHECK-NEXT: sltu a6, a6, t0 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a6, a6, t0 -; CHECK-NEXT: addi t0, a6, -16 -; CHECK-NEXT: sltu t1, a6, t0 -; CHECK-NEXT: addi t1, t1, -1 -; CHECK-NEXT: and t0, t1, t0 -; CHECK-NEXT: vsetvli zero, t0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: addi t0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (t0) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: slli t0, t0, 4 -; CHECK-NEXT: add t0, sp, t0 -; CHECK-NEXT: addi t0, t0, 16 -; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a6, a2, .LBB16_6 -; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: li a6, 16 -; CHECK-NEXT: .LBB16_6: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v20, v5, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a5) -; CHECK-NEXT: addi a1, a1, 256 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 3 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 48 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: mv a5, a4 -; CHECK-NEXT: bltu a4, a3, .LBB16_8 -; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: li a5, 32 -; CHECK-NEXT: .LBB16_8: -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: addi a1, a5, -16 -; CHECK-NEXT: sltu a5, a5, a1 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a1, a5, a1 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: bltu a4, a2, .LBB16_10 -; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: .LBB16_10: -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v6, v7, 2 -; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 -; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: mv a1, a7 -; CHECK-NEXT: bltu a7, a3, .LBB16_12 -; CHECK-NEXT: # %bb.11: -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: .LBB16_12: -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 56 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v24, 16 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 56 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 4 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v24, v8 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 48 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; CHECK-NEXT: vslideup.vi v8, v24, 16 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 48 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vmv4r.v v8, v0 -; CHECK-NEXT: vslideup.vi v8, v16, 16 -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a4, a4, a5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: addi a4, a1, -16 -; CHECK-NEXT: sltu a1, a1, a4 -; CHECK-NEXT: addi a1, a1, -1 -; CHECK-NEXT: and a1, a1, a4 -; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t -; CHECK-NEXT: bltu a7, a2, .LBB16_14 -; CHECK-NEXT: # %bb.13: -; CHECK-NEXT: li a7, 16 -; CHECK-NEXT: .LBB16_14: -; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v16, v24, 0, v0.t -; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vslideup.vi v16, v8, 16 -; CHECK-NEXT: vse32.v v16, (a0) -; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 48 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a1) -; CHECK-NEXT: addi a0, a0, 384 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 56 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 6 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %v = call <128 x i32> @llvm.vp.trunc.v128i32.v128i64(<128 x i64> %a, <128 x i1> %m, i32 %vl) ret <128 x i32> %v } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll index 70b547759938f..920e0b66af753 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vaaddu.ll @@ -163,12 +163,12 @@ define <8 x i1> @vaaddu_vv_v8i1_floor(<8 x i1> %x, <8 x i1> %y) { ; CHECK-LABEL: vaaddu_vv_v8i1_floor: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrwi vxrm, 2 -; CHECK-NEXT: vaaddu.vv v8, v10, v8 +; CHECK-NEXT: vaaddu.vv v8, v9, v8 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -408,12 +408,12 @@ define <8 x i1> @vaaddu_vv_v8i1_ceil(<8 x i1> %x, <8 x i1> %y) { ; CHECK-LABEL: vaaddu_vv_v8i1_ceil: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v10, v9, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrwi vxrm, 0 -; CHECK-NEXT: vaaddu.vv v8, v10, v8 +; CHECK-NEXT: vaaddu.vv v8, v9, v8 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index 2c62cbd583d00..0df1702f7acbe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -363,7 +363,7 @@ declare <256 x i8> @llvm.vp.add.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_v258i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) @@ -378,7 +378,7 @@ define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.add.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll index 507cf5cc6b80c..fac3ad109b3c6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll @@ -1139,17 +1139,17 @@ define <11 x i64> @vand_vv_v11i64_unmasked(<11 x i64> %va, <11 x i64> %b, i32 ze define <11 x i64> @vand_vx_v11i64(<11 x i64> %va, i64 %b, <11 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vand_vx_v11i64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v16, v0 +; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: lui a3, 341 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vmv.s.x v0, a3 -; RV32-NEXT: vmv.v.x v24, a1 -; RV32-NEXT: vmerge.vxm v24, v24, a0, v0 +; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: lui a1, 341 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: vmerge.vxm v16, v16, a0, v0 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v16 -; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vand_vx_v11i64: @@ -1168,10 +1168,10 @@ define <11 x i64> @vand_vx_v11i64_unmasked(<11 x i64> %va, i64 %b, i32 zeroext % ; RV32: # %bb.0: ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: lui a3, 341 -; RV32-NEXT: addi a3, a3, 1365 -; RV32-NEXT: vmv.s.x v0, a3 ; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: lui a1, 341 +; RV32-NEXT: addi a1, a1, 1365 +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vmerge.vxm v16, v16, a0, v0 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll index 01b07b4081e6d..10dc46a063ecc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -323,9 +323,9 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfsgnj.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll index 0574773fb2fd9..5cd1771e7ac4a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -890,7 +890,6 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 @@ -902,6 +901,7 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll index ffa88e28d7dc8..1050ca033a0af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -415,9 +415,9 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll index 17f851e172f81..e6f408d7e49b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -415,9 +415,9 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll index 288efb0f1fc27..1835f9238f827 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll @@ -654,7 +654,6 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 @@ -666,6 +665,7 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index 3db44e87109bd..aa283c02b67c2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -268,7 +268,7 @@ declare <256 x i8> @llvm.vp.smax.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_v258i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -283,7 +283,7 @@ define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zero ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index c97c2232715f5..bb432be7d47e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -267,7 +267,7 @@ declare <256 x i8> @llvm.vp.umax.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) define <256 x i8> @vmaxu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_v258i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -282,7 +282,7 @@ define <256 x i8> @vmaxu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zer ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index eaa19110a2a28..24e81df404c94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -268,7 +268,7 @@ declare <256 x i8> @llvm.vp.smin.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) define <256 x i8> @vmin_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_v258i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -283,7 +283,7 @@ define <256 x i8> @vmin_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zero ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index 48175e5b905ba..e26089e2648b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -267,7 +267,7 @@ declare <256 x i8> @llvm.vp.umin.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, i32) define <256 x i8> @vminu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_v258i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -282,7 +282,7 @@ define <256 x i8> @vminu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zer ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB22_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index a13f1eed8efb1..67570f8ccff7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -291,16 +291,16 @@ define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> % ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t -; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a1, a1, a2 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: li a0, 32 @@ -2444,7 +2444,6 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v7, v0 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vnsrl.wi v16, v8, 0 @@ -2459,6 +2458,7 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t @@ -2468,7 +2468,7 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB96_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 9ef89352e65e5..2124e103e0191 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -395,7 +395,7 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_v33f64: ; CHECK: # %bb.0: ; CHECK-NEXT: li a4, 32 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: mv a3, a2 ; CHECK-NEXT: bltu a2, a4, .LBB32_2 ; CHECK-NEXT: # %bb.1: @@ -404,40 +404,40 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: addi a4, a3, -16 ; CHECK-NEXT: sltu a3, a3, a4 ; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: and a5, a3, a4 ; CHECK-NEXT: addi a4, a1, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: addi a3, a2, -32 -; CHECK-NEXT: sltu a4, a2, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a4, a4, a3 +; CHECK-NEXT: sltu a6, a2, a3 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a6, a6, a3 ; CHECK-NEXT: li a3, 16 -; CHECK-NEXT: bltu a4, a3, .LBB32_4 +; CHECK-NEXT: vslidedown.vi v0, v24, 2 +; CHECK-NEXT: bltu a6, a3, .LBB32_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: li a4, 16 +; CHECK-NEXT: li a6, 16 ; CHECK-NEXT: .LBB32_4: -; CHECK-NEXT: addi a5, a1, 256 +; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a4), v0.t +; CHECK-NEXT: addi a4, a1, 256 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a5), v0.t +; CHECK-NEXT: vslidedown.vi v0, v24, 4 +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: bltu a2, a3, .LBB32_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB32_6: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vle64.v v8, (a1), v0.t +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vle64.v v24, (a1), v0.t ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: vse64.v v24, (a0) ; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vse64.v v16, (a1) +; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: addi a0, a0, 256 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-NEXT: vse64.v v24, (a0) +; CHECK-NEXT: vse64.v v16, (a0) ; CHECK-NEXT: ret %load = call <33 x double> @llvm.vp.load.v33f64.p0(ptr %ptr, <33 x i1> %m, i32 %evl) ret <33 x double> %load diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index cd9a38d5167d5..46bbb76eefe87 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1851,28 +1851,28 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v0, v24 +; RV64-NEXT: vsext.vf2 v16, v24 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v24, 16 +; RV64-NEXT: vslidedown.vi v8, v24, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf2 v16, v24 -; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vsext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v24, v0, 3 +; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB81_2 ; RV64-NEXT: # %bb.1: @@ -1880,8 +1880,8 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: .LBB81_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v16, (a0), v24, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 @@ -1890,13 +1890,13 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1938,28 +1938,28 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 -; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v0, v24 +; RV64-NEXT: vzext.vf2 v16, v24 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v24, 16 +; RV64-NEXT: vslidedown.vi v8, v24, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vzext.vf2 v16, v24 -; RV64-NEXT: vsll.vi v16, v16, 3 +; RV64-NEXT: vzext.vf2 v24, v8 +; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsll.vi v24, v0, 3 +; RV64-NEXT: vsll.vi v24, v16, 3 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB82_2 ; RV64-NEXT: # %bb.1: @@ -1967,8 +1967,8 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: .LBB82_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v16, (a0), v24, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 @@ -1977,13 +1977,13 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 10 -; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll index 291629de6dcfa..889ecd4d06c37 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -372,7 +372,7 @@ declare <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, define <256 x i8> @vsadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsadd_vi_v258i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) @@ -387,7 +387,7 @@ define <256 x i8> @vsadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %e ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsadd.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.sadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll index d38ee1148e894..411bead6b8dec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll @@ -368,7 +368,7 @@ declare <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, define <256 x i8> @vsaddu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsaddu_vi_v258i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) @@ -383,7 +383,7 @@ define <256 x i8> @vsaddu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext % ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsaddu.vi v8, v8, -1, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.uadd.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index a454e1dbc967c..cb502dea1028d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -158,48 +158,38 @@ define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i3 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v9, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmv1r.v v7, v8 +; CHECK-NEXT: vmv1r.v v6, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v24, (a0) ; CHECK-NEXT: addi a0, a1, 128 -; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a3, -128 ; CHECK-NEXT: sltu a4, a3, a0 ; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: vle8.v v0, (a1) -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle8.v v16, (a1) ; CHECK-NEXT: and a0, a4, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vvm v24, v16, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: bltu a3, a2, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: .LBB11_2: ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -221,38 +211,39 @@ define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a1, 128 ; CHECK-NEXT: vle8.v v24, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vle8.v v16, (a1) +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v7, v8 +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: vle8.v v16, (a1) ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0 -; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 7dcd4c4199827..a1596e2080b36 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -423,8 +423,8 @@ define void @vselect_vv_v8i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a3) ; CHECK-NEXT: ret @@ -440,8 +440,8 @@ define void @vselect_vx_v8i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vse32.v v8, (a3) ; CHECK-NEXT: ret @@ -458,8 +458,8 @@ define void @vselect_vi_v8i32(ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: ret @@ -474,8 +474,8 @@ define void @vselect_vv_v8f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a3) ; CHECK-NEXT: ret @@ -491,8 +491,8 @@ define void @vselect_vx_v8f32(float %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vx_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: ret @@ -509,8 +509,8 @@ define void @vselect_vfpzero_v8f32(ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vfpzero_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: ret @@ -525,8 +525,8 @@ define void @vselect_vv_v16i16(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vv_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: ret @@ -542,8 +542,8 @@ define void @vselect_vx_v16i16(i16 signext %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vx_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: ret @@ -560,8 +560,8 @@ define void @vselect_vi_v16i16(ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vi_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, 4, v0 ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: ret @@ -577,8 +577,8 @@ define void @vselect_vv_v32f16(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a4, 32 ; CHECK-NEXT: vsetvli zero, a4, e16, m4, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: ret @@ -595,8 +595,8 @@ define void @vselect_vx_v32f16(half %a, ptr %b, ptr %cc, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: ret @@ -614,8 +614,8 @@ define void @vselect_vfpzero_v32f16(ptr %b, ptr %cc, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll index 2caa2ff41a7d9..5df2db29f2ca5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -384,7 +384,7 @@ declare <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, define <256 x i8> @vssub_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vssub_vi_v258i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) @@ -400,7 +400,7 @@ define <256 x i8> @vssub_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %e ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -437,11 +437,11 @@ define <256 x i8> @vssub_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: vlm.v v25, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -455,11 +455,11 @@ define <256 x i8> @vssub_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: vlm.v v25, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssub.vx v8, v8, a0, v0.t ; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vssub.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.ssub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll index 6313f31bc1a61..2ae7a965ab513 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll @@ -379,7 +379,7 @@ declare <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8>, <256 x i8>, <256 x i1>, define <256 x i8> @vssubu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vssubu_vi_v258i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) @@ -395,7 +395,7 @@ define <256 x i8> @vssubu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext % ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 %evl) @@ -432,11 +432,11 @@ define <256 x i8> @vssubu_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: vlm.v v25, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 129) @@ -450,11 +450,11 @@ define <256 x i8> @vssubu_vi_v258i8_evl128(<256 x i8> %va, <256 x i1> %m) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vlm.v v24, (a0) +; CHECK-NEXT: vlm.v v25, (a0) ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vssubu.vx v8, v8, a0, v0.t ; CHECK-NEXT: vsetivli zero, 0, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vssubu.vx v16, v16, a0, v0.t ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.usub.sat.v258i8(<256 x i8> %va, <256 x i8> splat (i8 -1), <256 x i1> %m, i32 128) diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index b5346b355743c..59798147555dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -682,18 +682,10 @@ declare @llvm.vp.floor.nxv16f64(, < define @vp_floor_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -702,49 +694,32 @@ define @vp_floor_nxv16f64( %va, @llvm.vp.floor.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll index 13107103a5ecf..24ce8a4071d59 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll @@ -23,11 +23,10 @@ define @vfmax_nxv1f16_vv( %a, @vfmax_nxv1f16_vv( %a, @vfmax_nxv2f16_vv( %a, @vfmax_nxv2f16_vv( %a, @vfmax_nxv4f16_vv( %a, @vfmax_nxv4f16_vv( %a, @vfmax_nxv8f16_vv( %a, @vfmax_nxv8f16_vv( %a, @vfmax_nxv16f16_vv( %a, @llvm.maximum.nxv16f16( %a, %b) ret %v @@ -209,35 +176,22 @@ declare @llvm.maximum.nxv32f16(, @vfmax_nxv32f16_vv( %a, %b) nounwind { ; ZVFH-LABEL: vfmax_nxv32f16_vv: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: sub sp, sp, a0 ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v24, v16, v16 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vmfeq.vv v0, v16, v16 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vfmax.vv v8, v8, v24 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_nxv32f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 18 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: sub sp, sp, a0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 3 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -247,63 +201,43 @@ define @vfmax_nxv32f16_vv( %a, @vfmax_nxv1f32_vv( %a, @llvm.maximum.nxv1f32( %a, %b) ret %v @@ -335,11 +268,10 @@ define @vfmax_nxv2f32_vv( %a, @llvm.maximum.nxv2f32( %a, %b) ret %v @@ -352,9 +284,8 @@ define @vfmax_nxv4f32_vv( %a, @vfmax_nxv8f32_vv( %a, @llvm.maximum.nxv16f32(, @vfmax_nxv16f32_vv( %a, %b) nounwind { ; CHECK-LABEL: vfmax_nxv16f32_vv: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.maximum.nxv16f32( %a, %b) ret %v @@ -413,11 +332,10 @@ define @vfmax_nxv1f64_vv( %a, @llvm.maximum.nxv1f64( %a, %b) ret %v @@ -430,9 +348,8 @@ define @vfmax_nxv2f64_vv( %a, @vfmax_nxv4f64_vv( %a, @llvm.maximum.nxv8f64(, @vfmax_nxv8f64_vv( %a, %b) nounwind { ; CHECK-LABEL: vfmax_nxv8f64_vv: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.maximum.nxv8f64( %a, %b) ret %v @@ -523,16 +428,15 @@ define @vfmax_nxv1f16_vv_nnana( %a, @vfmax_nxv1f16_vv_nnanb( %a, @llvm.vp.maximum.nxv1f16(, @vfmax_vv_nxv1f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv1f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define @vfmax_vv_nxv1f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv1f16_unmasked: @@ -66,12 +66,11 @@ define @vfmax_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare @llvm.vp.maximum.nxv2f16(, @vfmax_vv_nxv2f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define @vfmax_vv_nxv2f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv2f16_unmasked: @@ -138,12 +137,11 @@ define @vfmax_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,33 +155,34 @@ declare @llvm.vp.maximum.nxv4f16(, @vfmax_vv_nxv4f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v0, v14, v14, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v12, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmax.vv v10, v12, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -196,11 +195,10 @@ define @vfmax_vv_nxv4f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv4f16_unmasked: @@ -212,9 +210,8 @@ define @vfmax_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v2, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v2 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -229,33 +226,34 @@ declare @llvm.vp.maximum.nxv8f16(, @vfmax_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 ; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmax.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v0, v20, v20, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v16, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmax.vv v12, v16, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -268,9 +266,8 @@ define @vfmax_vv_nxv8f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v2, v10, v10 ; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v2 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 ; ZVFH-NEXT: vfmax.vv v8, v8, v12 ; ZVFH-NEXT: ret @@ -284,9 +281,8 @@ define @vfmax_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v4, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v4 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 ; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma @@ -301,31 +297,32 @@ declare @llvm.vp.maximum.nxv16f16(, @vfmax_vv_nxv16f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: vmv1r.v v20, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmv1r.v v0, v20 ; ZVFH-NEXT: vmfeq.vv v0, v12, v12, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 -; ZVFH-NEXT: vfmax.vv v8, v8, v20, v0.t +; ZVFH-NEXT: vmv1r.v v0, v20 +; ZVFH-NEXT: vfmax.vv v8, v8, v16, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma @@ -340,22 +337,14 @@ define @vfmax_vv_nxv16f16_unmasked( %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v4, v12, v12 ; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v4 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 ; ZVFH-NEXT: vfmax.vv v8, v8, v16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 10 -; ZVFHMIN-NEXT: mul a1, a1, a2 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -363,29 +352,12 @@ define @vfmax_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 10 -; ZVFHMIN-NEXT: mul a0, a0, a1 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.maximum.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -396,28 +368,15 @@ declare @llvm.vp.maximum.nxv32f16(, @vfmax_vv_nxv32f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: .cfi_def_cfa_offset 16 -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 3 -; ZVFH-NEXT: sub sp, sp, a1 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: vmv1r.v v7, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: vmfeq.vv v0, v16, v16, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vfmax.vv v8, v8, v16, v0.t -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: vfmax.vv v8, v8, v24, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv32f16: @@ -425,11 +384,10 @@ define @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16_unmasked( %va, %vb, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: .cfi_def_cfa_offset 16 -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 1 -; ZVFH-NEXT: sub sp, sp, a1 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v24, v16, v16 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vmfeq.vv v0, v16, v16 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vfmax.vv v8, v8, v24 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv32f16_unmasked: @@ -537,9 +513,10 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: li a2, 24 +; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -550,22 +527,27 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v6, v8 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v6 ; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -573,17 +555,13 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB11_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -591,25 +569,38 @@ define @vfmax_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v24, v16, v16 +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v16 -; ZVFHMIN-NEXT: vmv8r.v v8, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -622,15 +613,15 @@ declare @llvm.vp.maximum.nxv1f32(, @vfmax_vv_nxv1f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv1f32( %va, %vb, %m, i32 %evl) ret %v @@ -641,11 +632,10 @@ define @vfmax_vv_nxv1f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -656,15 +646,15 @@ declare @llvm.vp.maximum.nxv2f32(, @vfmax_vv_nxv2f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv2f32( %va, %vb, %m, i32 %evl) ret %v @@ -675,11 +665,10 @@ define @vfmax_vv_nxv2f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -690,15 +679,15 @@ declare @llvm.vp.maximum.nxv4f32(, @vfmax_vv_nxv4f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 ; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv4f32( %va, %vb, %m, i32 %evl) ret %v @@ -709,9 +698,8 @@ define @vfmax_vv_nxv4f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -724,15 +712,15 @@ declare @llvm.vp.maximum.nxv8f32(, @vfmax_vv_nxv8f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 ; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv8f32( %va, %vb, %m, i32 %evl) ret %v @@ -743,9 +731,8 @@ define @vfmax_vv_nxv8f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -758,15 +745,15 @@ declare @llvm.vp.maximum.nxv1f64(, @vfmax_vv_nxv1f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv1f64( %va, %vb, %m, i32 %evl) ret %v @@ -777,11 +764,10 @@ define @vfmax_vv_nxv1f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -792,15 +778,15 @@ declare @llvm.vp.maximum.nxv2f64(, @vfmax_vv_nxv2f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 ; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv2f64( %va, %vb, %m, i32 %evl) ret %v @@ -811,9 +797,8 @@ define @vfmax_vv_nxv2f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -826,15 +811,15 @@ declare @llvm.vp.maximum.nxv4f64(, @vfmax_vv_nxv4f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 ; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv4f64( %va, %vb, %m, i32 %evl) ret %v @@ -845,9 +830,8 @@ define @vfmax_vv_nxv4f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -860,28 +844,15 @@ declare @llvm.vp.maximum.nxv8f64(, @vfmax_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv8f64( %va, %vb, %m, i32 %evl) ret %v @@ -890,25 +861,12 @@ define @vfmax_vv_nxv8f64( %va, @vfmax_vv_nxv8f64_unmasked( %va, %vb, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.maximum.nxv8f64( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -922,103 +880,71 @@ define @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64_unmasked( ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a3, a1, 4 -; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v8, (a3) -; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vl8re64.v v24, (a3) +; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: sub a0, a2, a1 ; CHECK-NEXT: sltu a3, a2, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a3, a0, 3 -; CHECK-NEXT: add a0, a3, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vfmax.vv v8, v16, v8 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a3, a0, 3 -; CHECK-NEXT: add a0, a3, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a3, a0, 3 -; CHECK-NEXT: add a0, a3, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a2, a1, .LBB29_2 ; CHECK-NEXT: # %bb.1: @@ -1090,31 +996,24 @@ define @vfmax_vv_nxv16f64_unmasked( ; CHECK-NEXT: .LBB29_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v16, v24, v24 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll index e5a0fdeda120f..198d3a98710d6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -23,11 +23,10 @@ define @vfmin_nxv1f16_vv( %a, @vfmin_nxv1f16_vv( %a, @vfmin_nxv2f16_vv( %a, @vfmin_nxv2f16_vv( %a, @vfmin_nxv4f16_vv( %a, @vfmin_nxv4f16_vv( %a, @vfmin_nxv8f16_vv( %a, @vfmin_nxv8f16_vv( %a, @vfmin_nxv16f16_vv( %a, @llvm.minimum.nxv16f16( %a, %b) ret %v @@ -209,35 +176,22 @@ declare @llvm.minimum.nxv32f16(, @vfmin_nxv32f16_vv( %a, %b) nounwind { ; ZVFH-LABEL: vfmin_nxv32f16_vv: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: sub sp, sp, a0 ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v24, v16, v16 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vmfeq.vv v0, v16, v16 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vfmin.vv v8, v8, v24 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_nxv32f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 18 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: sub sp, sp, a0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a1, a0, 3 -; ZVFHMIN-NEXT: add a0, a1, a0 +; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -247,63 +201,43 @@ define @vfmin_nxv32f16_vv( %a, @vfmin_nxv1f32_vv( %a, @llvm.minimum.nxv1f32( %a, %b) ret %v @@ -335,11 +268,10 @@ define @vfmin_nxv2f32_vv( %a, @llvm.minimum.nxv2f32( %a, %b) ret %v @@ -352,9 +284,8 @@ define @vfmin_nxv4f32_vv( %a, @vfmin_nxv8f32_vv( %a, @llvm.minimum.nxv16f32(, @vfmin_nxv16f32_vv( %a, %b) nounwind { ; CHECK-LABEL: vfmin_nxv16f32_vv: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.minimum.nxv16f32( %a, %b) ret %v @@ -413,11 +332,10 @@ define @vfmin_nxv1f64_vv( %a, @llvm.minimum.nxv1f64( %a, %b) ret %v @@ -430,9 +348,8 @@ define @vfmin_nxv2f64_vv( %a, @vfmin_nxv4f64_vv( %a, @llvm.minimum.nxv8f64(, @vfmin_nxv8f64_vv( %a, %b) nounwind { ; CHECK-LABEL: vfmin_nxv8f64_vv: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.minimum.nxv8f64( %a, %b) ret %v @@ -523,16 +428,15 @@ define @vfmin_nxv1f16_vv_nnana( %a, @vfmin_nxv1f16_vv_nnanb( %a, @llvm.vp.minimum.nxv1f16(, @vfmin_vv_nxv1f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv1f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define @vfmin_vv_nxv1f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv1f16_unmasked: @@ -66,12 +66,11 @@ define @vfmin_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare @llvm.vp.minimum.nxv2f16(, @vfmin_vv_nxv2f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define @vfmin_vv_nxv2f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv2f16_unmasked: @@ -138,12 +137,11 @@ define @vfmin_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,33 +155,34 @@ declare @llvm.vp.minimum.nxv4f16(, @vfmin_vv_nxv4f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v0, v14, v14, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v12, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmin.vv v10, v12, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -196,11 +195,10 @@ define @vfmin_vv_nxv4f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv4f16_unmasked: @@ -212,9 +210,8 @@ define @vfmin_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v2, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v2 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma @@ -229,33 +226,34 @@ declare @llvm.vp.minimum.nxv8f16(, @vfmin_vv_nxv8f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 ; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmin.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v0, v20, v20, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v16, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmin.vv v12, v16, v8, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -268,9 +266,8 @@ define @vfmin_vv_nxv8f16_unmasked( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v2, v10, v10 ; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v2 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 ; ZVFH-NEXT: vfmin.vv v8, v8, v12 ; ZVFH-NEXT: ret @@ -284,9 +281,8 @@ define @vfmin_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v4, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v4 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 ; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 ; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma @@ -301,31 +297,32 @@ declare @llvm.vp.minimum.nxv16f16(, @vfmin_vv_nxv16f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: vmv1r.v v20, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmv1r.v v0, v20 ; ZVFH-NEXT: vmfeq.vv v0, v12, v12, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 -; ZVFH-NEXT: vfmin.vv v8, v8, v20, v0.t +; ZVFH-NEXT: vmv1r.v v0, v20 +; ZVFH-NEXT: vfmin.vv v8, v8, v16, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma @@ -340,22 +337,14 @@ define @vfmin_vv_nxv16f16_unmasked( %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v4, v12, v12 ; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v4 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 ; ZVFH-NEXT: vfmin.vv v8, v8, v16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 10 -; ZVFHMIN-NEXT: mul a1, a1, a2 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -363,29 +352,12 @@ define @vfmin_vv_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 10 -; ZVFHMIN-NEXT: mul a0, a0, a1 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.minimum.nxv16f16( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -396,28 +368,15 @@ declare @llvm.vp.minimum.nxv32f16(, @vfmin_vv_nxv32f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: .cfi_def_cfa_offset 16 -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 3 -; ZVFH-NEXT: sub sp, sp, a1 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: vmv1r.v v7, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: vmfeq.vv v0, v16, v16, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vfmin.vv v8, v8, v16, v0.t -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: vfmin.vv v8, v8, v24, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv32f16: @@ -425,11 +384,10 @@ define @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16_unmasked( %va, %vb, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: .cfi_def_cfa_offset 16 -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 1 -; ZVFH-NEXT: sub sp, sp, a1 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v24, v16, v16 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vmfeq.vv v0, v16, v16 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vfmin.vv v8, v8, v24 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 1 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv32f16_unmasked: @@ -537,9 +513,10 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: li a2, 24 +; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -550,22 +527,27 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: csrr a2, vlenb +; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: add a2, sp, a2 +; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v6, v8 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v6 ; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t @@ -573,17 +555,13 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB11_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 -; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 @@ -591,25 +569,38 @@ define @vfmin_vv_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v24, v16, v16 +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 +; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: add a1, sp, a1 +; ZVFHMIN-NEXT: addi a1, a1, 16 +; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v16 -; ZVFHMIN-NEXT: vmv8r.v v8, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -622,15 +613,15 @@ declare @llvm.vp.minimum.nxv1f32(, @vfmin_vv_nxv1f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv1f32( %va, %vb, %m, i32 %evl) ret %v @@ -641,11 +632,10 @@ define @vfmin_vv_nxv1f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv1f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -656,15 +646,15 @@ declare @llvm.vp.minimum.nxv2f32(, @vfmin_vv_nxv2f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv2f32( %va, %vb, %m, i32 %evl) ret %v @@ -675,11 +665,10 @@ define @vfmin_vv_nxv2f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv2f32( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -690,15 +679,15 @@ declare @llvm.vp.minimum.nxv4f32(, @vfmin_vv_nxv4f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 ; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv4f32( %va, %vb, %m, i32 %evl) ret %v @@ -709,9 +698,8 @@ define @vfmin_vv_nxv4f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -724,15 +712,15 @@ declare @llvm.vp.minimum.nxv8f32(, @vfmin_vv_nxv8f32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 ; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv8f32( %va, %vb, %m, i32 %evl) ret %v @@ -743,9 +731,8 @@ define @vfmin_vv_nxv8f32_unmasked( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -758,15 +745,15 @@ declare @llvm.vp.minimum.nxv1f64(, @vfmin_vv_nxv1f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv1f64( %va, %vb, %m, i32 %evl) ret %v @@ -777,11 +764,10 @@ define @vfmin_vv_nxv1f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv1f64( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -792,15 +778,15 @@ declare @llvm.vp.minimum.nxv2f64(, @vfmin_vv_nxv2f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 ; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv2f64( %va, %vb, %m, i32 %evl) ret %v @@ -811,9 +797,8 @@ define @vfmin_vv_nxv2f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v2, v10, v10 ; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret @@ -826,15 +811,15 @@ declare @llvm.vp.minimum.nxv4f64(, @vfmin_vv_nxv4f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 ; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv4f64( %va, %vb, %m, i32 %evl) ret %v @@ -845,9 +830,8 @@ define @vfmin_vv_nxv4f64_unmasked( %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v4, v12, v12 ; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v4 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret @@ -860,28 +844,15 @@ declare @llvm.vp.minimum.nxv8f64(, @vfmin_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv8f64( %va, %vb, %m, i32 %evl) ret %v @@ -890,25 +861,12 @@ define @vfmin_vv_nxv8f64( %va, @vfmin_vv_nxv8f64_unmasked( %va, %vb, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.minimum.nxv8f64( %va, %vb, splat (i1 true), i32 %evl) ret %v @@ -922,103 +880,71 @@ define @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64_unmasked( ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a3, a1, 4 -; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v8, (a3) -; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vl8re64.v v24, (a3) +; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: sub a0, a2, a1 ; CHECK-NEXT: sltu a3, a2, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a3, a0, 3 -; CHECK-NEXT: add a0, a3, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vfmin.vv v8, v16, v8 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a3, a0, 3 -; CHECK-NEXT: add a0, a3, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a3, a0, 3 -; CHECK-NEXT: add a0, a3, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a2, a1, .LBB29_2 ; CHECK-NEXT: # %bb.1: @@ -1090,31 +996,24 @@ define @vfmin_vv_nxv16f64_unmasked( ; CHECK-NEXT: .LBB29_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v16, v24, v24 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll index f90237b8d7e95..4ea9657f9b17c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll @@ -10,9 +10,9 @@ define @nearbyint_nxv1f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -33,9 +33,9 @@ define @nearbyint_nxv2f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -56,9 +56,9 @@ define @nearbyint_nxv4f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -79,9 +79,9 @@ define @nearbyint_nxv8f16( %v) strictfp { ; CHECK-LABEL: nearbyint_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -102,9 +102,9 @@ define @nearbyint_nxv16f16( %v) strictf ; CHECK-LABEL: nearbyint_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -125,9 +125,9 @@ define @nearbyint_nxv32f16( %v) strictf ; CHECK-LABEL: nearbyint_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -263,9 +263,9 @@ define @nearbyint_nxv1f64( %v) strict ; CHECK-LABEL: nearbyint_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -286,9 +286,9 @@ define @nearbyint_nxv2f64( %v) strict ; CHECK-LABEL: nearbyint_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -309,9 +309,9 @@ define @nearbyint_nxv4f64( %v) strict ; CHECK-LABEL: nearbyint_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -332,9 +332,9 @@ define @nearbyint_nxv8f64( %v) strict ; CHECK-LABEL: nearbyint_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll index 3276f481f30ea..b3e5d46c201a7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-costrained-sdnode.ll @@ -10,9 +10,9 @@ define @round_nxv1f16( %x) strictfp { ; CHECK-LABEL: round_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -32,9 +32,9 @@ define @round_nxv2f16( %x) strictfp { ; CHECK-LABEL: round_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -54,9 +54,9 @@ define @round_nxv4f16( %x) strictfp { ; CHECK-LABEL: round_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -76,9 +76,9 @@ define @round_nxv8f16( %x) strictfp { ; CHECK-LABEL: round_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -98,9 +98,9 @@ define @round_nxv16f16( %x) strictfp { ; CHECK-LABEL: round_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -120,9 +120,9 @@ define @round_nxv32f16( %x) strictfp { ; CHECK-LABEL: round_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -252,9 +252,9 @@ define @round_nxv1f64( %x) strictfp { ; CHECK-LABEL: round_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -274,9 +274,9 @@ define @round_nxv2f64( %x) strictfp { ; CHECK-LABEL: round_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -296,9 +296,9 @@ define @round_nxv4f64( %x) strictfp { ; CHECK-LABEL: round_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -318,9 +318,9 @@ define @round_nxv8f64( %x) strictfp { ; CHECK-LABEL: round_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll index 4ebfcccbaaa6e..a3bb9cb0ec7d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll @@ -10,9 +10,9 @@ define @roundeven_nxv1f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -32,9 +32,9 @@ define @roundeven_nxv2f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -54,9 +54,9 @@ define @roundeven_nxv4f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -76,9 +76,9 @@ define @roundeven_nxv8f16( %x) strictfp { ; CHECK-LABEL: roundeven_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -98,9 +98,9 @@ define @roundeven_nxv16f16( %x) strictf ; CHECK-LABEL: roundeven_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -120,9 +120,9 @@ define @roundeven_nxv32f16( %x) strictf ; CHECK-LABEL: roundeven_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -252,9 +252,9 @@ define @roundeven_nxv1f64( %x) strict ; CHECK-LABEL: roundeven_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -274,9 +274,9 @@ define @roundeven_nxv2f64( %x) strict ; CHECK-LABEL: roundeven_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -296,9 +296,9 @@ define @roundeven_nxv4f64( %x) strict ; CHECK-LABEL: roundeven_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -318,9 +318,9 @@ define @roundeven_nxv8f64( %x) strict ; CHECK-LABEL: roundeven_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index f9f085dcc1614..816b63edc87a8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -251,10 +251,10 @@ define @fshl_v64i8( %a, ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vsrl.vi v16, v16, 1, v0.t -; CHECK-NEXT: vnot.v v8, v24, v0.t -; CHECK-NEXT: vand.vi v8, v8, 7, v0.t -; CHECK-NEXT: vsrl.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsrl.vi v8, v16, 1, v0.t +; CHECK-NEXT: vnot.v v16, v24, v0.t +; CHECK-NEXT: vand.vi v16, v16, 7, v0.t +; CHECK-NEXT: vsrl.vv v8, v8, v16, v0.t ; CHECK-NEXT: vand.vi v16, v24, 7, v0.t ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsll.vv v16, v24, v16, v0.t @@ -483,10 +483,10 @@ define @fshl_v32i16( %a, @fshr_v16i32( %a, @fshl_v16i32( %a, @fshr_v7i64( %a, ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vand.vx v8, v24, a0, v0.t -; CHECK-NEXT: vsrl.vv v16, v16, v8, v0.t -; CHECK-NEXT: vnot.v v8, v24, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t +; CHECK-NEXT: vsrl.vv v16, v16, v24, v0.t +; CHECK-NEXT: vnot.v v24, v8, v0.t +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsll.vi v24, v24, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v24, v8, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v24, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -859,26 +872,37 @@ define @fshl_v7i64( %a, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vand.vx v8, v24, a0, v0.t -; CHECK-NEXT: vsll.vv v8, v16, v8, v0.t -; CHECK-NEXT: vnot.v v16, v24, v0.t -; CHECK-NEXT: vand.vx v16, v16, a0, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t +; CHECK-NEXT: vsll.vv v16, v16, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vnot.v v24, v8, v0.t +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: vsrl.vv v16, v16, v24, v0.t ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vi v24, v24, 1, v0.t -; CHECK-NEXT: vsrl.vv v16, v24, v16, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -901,14 +925,15 @@ define @fshr_v8i64( %a, ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vand.vx v8, v24, a0, v0.t -; CHECK-NEXT: vsrl.vv v16, v16, v8, v0.t -; CHECK-NEXT: vnot.v v8, v24, v0.t -; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t +; CHECK-NEXT: vsrl.vv v16, v16, v24, v0.t +; CHECK-NEXT: vnot.v v24, v8, v0.t +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsll.vi v24, v24, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v24, v8, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t +; CHECK-NEXT: vsll.vv v8, v8, v24, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -926,399 +951,52 @@ define @fshl_v8i64( %a, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv8r.v v16, v8 -; CHECK-NEXT: li a0, 63 -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; CHECK-NEXT: vand.vx v8, v24, a0, v0.t -; CHECK-NEXT: vsll.vv v8, v16, v8, v0.t -; CHECK-NEXT: vnot.v v16, v24, v0.t -; CHECK-NEXT: vand.vx v16, v16, a0, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vi v24, v24, 1, v0.t -; CHECK-NEXT: vsrl.vv v16, v24, v16, v0.t -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret - %res = call @llvm.vp.fshl.nxv8i64( %a, %b, %c, %m, i32 %evl) - ret %res -} - -declare @llvm.vp.fshr.nxv16i64(, , , , i32) -define @fshr_v16i64( %a, %b, %c, %m, i32 zeroext %evl) { -; CHECK-LABEL: fshr_v16i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 56 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a1, a3, 3 -; CHECK-NEXT: add a5, a0, a1 -; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 3 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: srli a5, a3, 3 -; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: add a5, a2, a1 -; CHECK-NEXT: sub a1, a4, a3 -; CHECK-NEXT: sltu a6, a4, a1 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a6, a6, a1 -; CHECK-NEXT: li a1, 63 -; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a7, 40 -; CHECK-NEXT: mul a5, a5, a7 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v16, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v16, (a2) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vand.vx v8, v8, a1, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vv v16, v16, v8, v0.t +; CHECK-NEXT: vmv8r.v v16, v8 +; CHECK-NEXT: li a0, 63 +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t +; CHECK-NEXT: vsll.vv v16, v16, v24, v0.t +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vnot.v v24, v8, v0.t +; CHECK-NEXT: vand.vx v24, v24, a0, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vand.vx v16, v8, a1, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vsrl.vi v16, v8, 1, v0.t +; CHECK-NEXT: vsrl.vv v16, v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a3, .LBB46_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB46_2: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vand.vx v8, v8, a1, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vnot.v v16, v8, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vor.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 56 -; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret + %res = call @llvm.vp.fshl.nxv8i64( %a, %b, %c, %m, i32 %evl) + ret %res +} + +declare @llvm.vp.fshr.nxv16i64(, , , , i32) +define @fshr_v16i64( %a, %b, %c, %m, i32 zeroext %evl) { %res = call @llvm.vp.fshr.nxv16i64( %a, %b, %c, %m, i32 %evl) ret %res } declare @llvm.vp.fshl.nxv16i64(, , , , i32) define @fshl_v16i64( %a, %b, %c, %m, i32 zeroext %evl) { -; CHECK-LABEL: fshl_v16i64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 56 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a1, a3, 3 -; CHECK-NEXT: add a5, a0, a1 -; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 4 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: srli a5, a3, 3 -; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a5 -; CHECK-NEXT: add a5, a2, a1 -; CHECK-NEXT: sub a1, a4, a3 -; CHECK-NEXT: sltu a6, a4, a1 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a6, a6, a1 -; CHECK-NEXT: li a1, 63 -; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a7, 40 -; CHECK-NEXT: mul a5, a5, a7 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v16, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a0, a0, a5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v16, (a2) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vand.vx v8, v8, a1, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsll.vv v16, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vnot.v v8, v8, v0.t -; CHECK-NEXT: vand.vx v16, v8, a1, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsrl.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: bltu a4, a3, .LBB47_2 -; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: .LBB47_2: -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vand.vx v8, v8, a1, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsll.vv v16, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 48 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vnot.v v16, v8, v0.t -; CHECK-NEXT: vand.vx v16, v16, a1, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsrl.vi v8, v8, 1, v0.t -; CHECK-NEXT: vsrl.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vor.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 56 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret %res = call @llvm.vp.fshl.nxv16i64( %a, %b, %c, %m, i32 %evl) ret %res } diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll index 3665669d83a3d..71628f029c521 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-constrained-sdnode.ll @@ -8,9 +8,9 @@ define @trunc_nxv1f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv1f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -28,9 +28,9 @@ define @trunc_nxv2f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv2f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -48,9 +48,9 @@ define @trunc_nxv4f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv4f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -68,9 +68,9 @@ define @trunc_nxv8f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -88,9 +88,9 @@ define @trunc_nxv16f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -108,9 +108,9 @@ define @trunc_nxv32f16( %x) strictfp { ; CHECK-LABEL: trunc_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 @@ -228,9 +228,9 @@ define @trunc_nxv1f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv1f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 @@ -248,9 +248,9 @@ define @trunc_nxv2f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 @@ -268,9 +268,9 @@ define @trunc_nxv4f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 @@ -288,9 +288,9 @@ define @trunc_nxv8f64( %x) strictfp { ; CHECK-LABEL: trunc_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 diff --git a/llvm/test/CodeGen/RISCV/rvv/implicit-def-copy.ll b/llvm/test/CodeGen/RISCV/rvv/implicit-def-copy.ll index 292f1deb2cce8..9475989d46343 100644 --- a/llvm/test/CodeGen/RISCV/rvv/implicit-def-copy.ll +++ b/llvm/test/CodeGen/RISCV/rvv/implicit-def-copy.ll @@ -11,8 +11,8 @@ define @vpload_nxv8i64(ptr %ptr, %m, i32 ze ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: $v0 = COPY [[COPY1]] - ; CHECK-NEXT: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK $noreg, [[COPY2]], $v0, [[COPY]], 6 /* e64 */, 1 /* ta, mu */ :: (load unknown-size from %ir.ptr, align 64) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vmv0 = COPY [[COPY1]] + ; CHECK-NEXT: [[PseudoVLE64_V_M8_MASK:%[0-9]+]]:vrm8nov0 = PseudoVLE64_V_M8_MASK $noreg, [[COPY2]], [[COPY3]], [[COPY]], 6 /* e64 */, 1 /* ta, mu */ :: (load unknown-size from %ir.ptr, align 64) ; CHECK-NEXT: $v8m8 = COPY [[PseudoVLE64_V_M8_MASK]] ; CHECK-NEXT: PseudoRET implicit $v8m8 %load = call @llvm.vp.load.nxv8i64.p0(ptr %ptr, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll index b15896580d425..2a6f3c257b8a5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll @@ -416,17 +416,17 @@ define @insert_nxv4i1_nxv1i1_0( %v, @llvm.vector.insert.nxv1i1.nxv4i1( %v, %sv, i64 0) ret %vec @@ -437,19 +437,19 @@ define @insert_nxv4i1_nxv1i1_2( %v, @llvm.vector.insert.nxv1i1.nxv4i1( %v, %sv, i64 2) ret %vec diff --git a/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll index 6d8763d34ec1b..7e8553c480414 100644 --- a/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/llrint-vp.ll @@ -55,10 +55,10 @@ declare @llvm.vp.llrint.nxv8i64.nxv8f32(, define @llrint_nxv16i64_nxv16f32( %x, %m, i32 zeroext %evl) { ; CHECK-LABEL: llrint_nxv16i64_nxv16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll index 9fa8807ed4add..a4f19b36a6c08 100644 --- a/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/lrint-vp.ll @@ -117,10 +117,10 @@ define @lrint_nxv16f32( %x, @mgather_baseidx_nxv32i8(ptr %base, ; ; RV64-LABEL: mgather_baseidx_nxv32i8: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v16, v0 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t +; RV64-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a2, a1, 3 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 +; RV64-NEXT: vslidedown.vx v8, v0, a2 +; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v9 +; RV64-NEXT: vsext.vf8 v16, v9 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t +; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vluxei64.v v13, (a0), v16, v0.t ; RV64-NEXT: srli a1, a1, 2 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v8, v16, a1 +; RV64-NEXT: vslidedown.vx v9, v24, a1 ; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v8, a2 +; RV64-NEXT: vslidedown.vx v0, v9, a2 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v11 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu @@ -2132,7 +2133,7 @@ define @mgather_baseidx_nxv32i8(ptr %base, ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v10 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vluxei64.v v14, (a0), v16, v0.t ; RV64-NEXT: vmv4r.v v8, v12 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/mutate-prior-vsetvli-avl.ll b/llvm/test/CodeGen/RISCV/rvv/mutate-prior-vsetvli-avl.ll index da56e235ecd8e..af30f7d7e6b4a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mutate-prior-vsetvli-avl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mutate-prior-vsetvli-avl.ll @@ -11,16 +11,16 @@ define dso_local void @test(ptr nocapture noundef %var_99) { ; CHECK-NEXT: addi a1, a1, %lo(.L__const.test.var_45) ; CHECK-NEXT: vsetivli zero, 2, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a1) -; CHECK-NEXT: li a1, 1 -; CHECK-NEXT: vmul.vx v12, v8, a1 ; CHECK-NEXT: lui a1, %hi(.L__const.test.var_101) ; CHECK-NEXT: addi a1, a1, %lo(.L__const.test.var_101) -; CHECK-NEXT: vle8.v v16, (a1) -; CHECK-NEXT: vmv.x.s a1, v12 +; CHECK-NEXT: vle8.v v12, (a1) +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: vmul.vx v16, v8, a1 +; CHECK-NEXT: vmv.x.s a1, v16 ; CHECK-NEXT: csrwi vxrm, 0 +; CHECK-NEXT: vssra.vv v12, v12, v8 ; CHECK-NEXT: vmsleu.vx v0, v8, a1 -; CHECK-NEXT: vssra.vv v8, v16, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v12, v12, v0 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index 21213e5c3c965..05d6bc6b9136e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -440,13 +440,6 @@ define @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16( %va, @llvm.vp.nearbyint.nxv32f16( %va, %m, i32 %evl) ret %v @@ -1011,18 +984,10 @@ declare @llvm.vp.nearbyint.nxv16f64( @vp_nearbyint_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb -; CHECK-NEXT: vmv8r.v v24, v16 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs1r.v v0, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -1031,38 +996,32 @@ define @vp_nearbyint_nxv16f64( %va, ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v16, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: vmv8r.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll index 8457f3d2c149c..c6662e092aa5a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pass-fast-math-flags-sdnode.ll @@ -14,8 +14,8 @@ define @foo( %x, @llvm.vp.fmul.nxv1f64( %x, %y, %m, i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/pr88576.ll b/llvm/test/CodeGen/RISCV/rvv/pr88576.ll index b6e0d1e2ff4ae..b1d2c37d83a32 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr88576.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr88576.ll @@ -7,6 +7,7 @@ define i1 @foo( %x, i64 %y) { ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a2, a1, 4 ; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: vmv2r.v v0, v8 ; CHECK-NEXT: bltu a0, a2, .LBB0_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a2 @@ -20,20 +21,29 @@ define i1 @foo( %x, i64 %y) { ; CHECK-NEXT: addi s0, sp, 80 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: addi a2, sp, 64 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 64 ; CHECK-NEXT: add a0, a2, a0 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 -; CHECK-NEXT: vs8r.v v24, (a1) -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: addi a3, sp, 64 +; CHECK-NEXT: vs2r.v v0, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 +; CHECK-NEXT: vs8r.v v16, (a1) +; CHECK-NEXT: addi a1, sp, 64 +; CHECK-NEXT: vl2r.v v0, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vs8r.v v8, (a2) ; CHECK-NEXT: lbu a0, 0(a0) ; CHECK-NEXT: addi sp, s0, -80 diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index a8788c6355671..9ba3da9efa480 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -398,13 +398,6 @@ define @vp_rint_nxv32f16( %va, @vp_rint_nxv32f16( %va, @llvm.vp.rint.nxv32f16( %va, %m, i32 %evl) ret %v @@ -908,18 +892,10 @@ declare @llvm.vp.rint.nxv16f64(, @vp_rint_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -928,45 +904,28 @@ define @vp_rint_nxv16f64( %va, @llvm.vp.rint.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index 8bd2f93683cce..b3fe6bb0c2843 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -440,13 +440,6 @@ define @vp_round_nxv32f16( %va, @vp_round_nxv32f16( %va, @llvm.vp.round.nxv32f16( %va, %m, i32 %evl) ret %v @@ -1000,18 +984,10 @@ declare @llvm.vp.round.nxv16f64(, < define @vp_round_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -1020,49 +996,32 @@ define @vp_round_nxv16f64( %va, @llvm.vp.round.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll index 2d6f145070b45..4dba53dd27b15 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -440,13 +440,6 @@ define @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16( %va, @llvm.vp.roundeven.nxv32f16( %va, %m, i32 %evl) ret %v @@ -1000,18 +984,10 @@ declare @llvm.vp.roundeven.nxv16f64( @vp_roundeven_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -1020,49 +996,32 @@ define @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index 3992ba8d12129..109149f14d286 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -440,13 +440,6 @@ define @vp_roundtozero_nxv32f16( %va, < ; ; ZVFHMIN-LABEL: vp_roundtozero_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vmv1r.v v16, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -455,53 +448,44 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 1 -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v20, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB10_2: -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v16 -; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmflt.vf v16, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v16 -; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t +; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 -; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t +; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 -; ZVFHMIN-NEXT: vmv8r.v v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.roundtozero.nxv32f16( %va, %m, i32 %evl) ret %v @@ -1000,18 +984,10 @@ declare @llvm.vp.roundtozero.nxv16f64( @vp_roundtozero_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -1020,49 +996,32 @@ define @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: lui a3, %hi(.LCPI32_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI32_0)(a3) ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v8, v16, v0.t +; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v0, v8, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB32_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t +; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 -; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir index 442419efb83ca..ec1e306797a8b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-to-vmv.mir @@ -15,7 +15,7 @@ body: | ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */ ; CHECK-NEXT: $v0 = COPY %mask - ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5 /* e32 */ %false:vr = COPY $v8 %true:vr = COPY $v9 %avl:gprnox0 = COPY $x1 @@ -60,7 +60,7 @@ body: | ; CHECK-NEXT: %avl:gprnox0 = COPY $x1 ; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */ ; CHECK-NEXT: $v0 = COPY %mask - ; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5 /* e32 */ %false:vr = COPY $v8 %pt:vrnov0 = COPY $v8 %true:vr = COPY $v9 diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll index 31fd5bdbd31fd..9ddd4f80a97cd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll @@ -15,8 +15,8 @@ define void @vpmerge_vpload_store( %passthru, ptr %p, ) into %ir.p) ; CHECK-NEXT: PseudoRET %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) @@ -34,8 +34,8 @@ define void @vpselect_vpload_store( %passthru, ptr %p, ) into %ir.p) ; CHECK-NEXT: PseudoRET %a = call @llvm.vp.load.nxv2i32.p0(ptr %p, splat (i1 -1), i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index 87bfbdae461e0..ee939d4a0661b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -2157,63 +2157,41 @@ define @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv32f64( %va, @fcmp_oeq_vv_nxv32f64( %va, @fcmp_oeq_vv_nxv32f64( %va, @fcmp_oeq_vv_nxv32f64( %va, @fcmp_oeq_vv_nxv32f64( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vv_nxv128i8( %va, @icmp_eq_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb @@ -1146,7 +1146,7 @@ define @icmp_eq_vx_nxv128i8( %va, i8 %b, ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB97_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret @@ -1159,7 +1159,7 @@ define @icmp_eq_vx_nxv128i8( %va, i8 %b, define @icmp_eq_vx_swap_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb @@ -1175,7 +1175,7 @@ define @icmp_eq_vx_swap_nxv128i8( %va, i8 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB98_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret @@ -2181,63 +2181,41 @@ define @icmp_eq_vv_nxv32i32( %va, @icmp_eq_vv_nxv32i32( %va, @icmp_eq_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: srli a2, a3, 2 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: sub a4, a1, a3 @@ -2265,7 +2243,7 @@ define @icmp_eq_vx_nxv32i32( %va, i32 %b, ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB190_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma @@ -2280,10 +2258,10 @@ define @icmp_eq_vx_nxv32i32( %va, i32 %b, define @icmp_eq_vx_swap_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: srli a2, a3, 2 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: sub a4, a1, a3 @@ -2297,7 +2275,7 @@ define @icmp_eq_vx_swap_nxv32i32( %va, i32 ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB191_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index 032d32109933f..7daa036918580 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -49,8 +49,8 @@ define <8 x i8> @v4i8_2(<4 x i8> %a, <4 x i8> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrsub.vi v8, v11, 3 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -174,8 +174,8 @@ define <8 x i16> @v4i16_2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrsub.vi v8, v11, 3 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -234,10 +234,10 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: addi a0, a0, %lo(.LCPI15_0) ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vle16.v v20, (a0) -; CHECK-NEXT: vmv2r.v v16, v10 +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vmv2r.v v20, v10 ; CHECK-NEXT: vmv2r.v v12, v8 -; CHECK-NEXT: vrgather.vv v8, v12, v20 +; CHECK-NEXT: vrgather.vv v8, v12, v16 ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vi v12, v12, 15 ; CHECK-NEXT: lui a0, 16 @@ -245,7 +245,7 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vrgather.vv v8, v16, v12, v0.t +; CHECK-NEXT: vrgather.vv v8, v20, v12, v0.t ; CHECK-NEXT: ret %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> ret <32 x i16> %v32i16 @@ -329,18 +329,18 @@ define <16 x i32> @v8i32_2(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: v8i32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 -; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v14 -; CHECK-NEXT: vrsub.vi v18, v14, 15 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v18, v10, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v8, v12, v18 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v18 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vrsub.vi v12, v14, 7 +; CHECK-NEXT: vrsub.vi v8, v10, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vrgatherei16.vv v8, v16, v12, v0.t +; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %v16i32 = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> ret <16 x i32> %v16i32 @@ -492,8 +492,8 @@ define <8 x half> @v4f16_2(<4 x half> %a, <4 x half> %b) { ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrsub.vi v8, v11, 3 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -638,18 +638,18 @@ define <16 x float> @v8f32_2(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: v8f32_2: ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 -; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v14 -; CHECK-NEXT: vrsub.vi v18, v14, 15 +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vrsub.vi v18, v10, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v8, v12, v18 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v18 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vrsub.vi v12, v14, 7 +; CHECK-NEXT: vrsub.vi v8, v10, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vrgatherei16.vv v8, v16, v12, v0.t +; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret %v16f32 = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> ret <16 x float> %v16f32 diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 9046c861c3367..0ac316a430e9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -4865,19 +4865,19 @@ declare <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32>, <4 x i32>, metadata, <4 x i1>, i define void @sink_splat_vp_icmp(ptr nocapture %x, i32 signext %y, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_icmp: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a3, 1 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB102_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vx v0, v10, a1, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmseq.vx v0, v9, a1, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vse32.v v9, (a0), v0.t +; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: bne a0, a3, .LBB102_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup @@ -4906,19 +4906,19 @@ declare <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float>, <4 x float>, metadata, <4 x i1 define void @sink_splat_vp_fcmp(ptr nocapture %x, float %y, <4 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: sink_splat_vp_fcmp: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a2, a0, a2 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: .LBB103_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmfeq.vf v0, v10, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v9, fa0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vse32.v v9, (a0), v0.t +; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: bne a0, a2, .LBB103_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll index 39f99f417fca9..76493d9514a8d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll @@ -13,14 +13,13 @@ define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vmv.v.x v10, a1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 ; CHECK-NEXT: ret %tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y) ret <2 x i64> %tmp @@ -33,13 +32,12 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 ; CHECK-NEXT: ret %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp @@ -52,13 +50,12 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 ; CHECK-NEXT: ret %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y) ret <8 x i16> %tmp @@ -70,14 +67,13 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsll.vv v10, v8, v9 -; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmerge.vxm v10, v10, a0, v0 +; CHECK-NEXT: vsll.vv v11, v8, v9 +; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vmsne.vv v0, v8, v9 +; CHECK-NEXT: vmerge.vvm v8, v11, v10, v0 ; CHECK-NEXT: ret %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y) ret <16 x i8> %tmp @@ -95,14 +91,13 @@ define @vec_nxv2i64( %x, ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: srli a1, a0, 1 -; CHECK-NEXT: vsll.vv v12, v8, v10 -; CHECK-NEXT: vsra.vv v10, v12, v10 -; CHECK-NEXT: vmsne.vv v2, v8, v10 -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vmv.v.x v12, a1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v10, v14, v10 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv2i64( %x, %y) ret %tmp @@ -115,13 +110,12 @@ define @vec_nxv4i32( %x, ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v12, v8, v10 -; CHECK-NEXT: vsra.vv v10, v12, v10 -; CHECK-NEXT: vmsne.vv v2, v8, v10 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v10, v14, v10 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv4i32( %x, %y) ret %tmp @@ -134,13 +128,12 @@ define @vec_nxv8i16( %x, ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: lui a0, 8 ; CHECK-NEXT: addi a1, a0, -1 -; CHECK-NEXT: vsll.vv v12, v8, v10 -; CHECK-NEXT: vsra.vv v10, v12, v10 -; CHECK-NEXT: vmsne.vv v2, v8, v10 -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmv.v.x v12, a1 +; CHECK-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v10, v14, v10 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv8i16( %x, %y) ret %tmp @@ -152,14 +145,13 @@ define @vec_nxv16i8( %x, ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vsll.vv v12, v8, v10 -; CHECK-NEXT: vsra.vv v10, v12, v10 -; CHECK-NEXT: vmsne.vv v2, v8, v10 -; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 -; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 +; CHECK-NEXT: vmerge.vxm v12, v12, a0, v0 +; CHECK-NEXT: vsll.vv v14, v8, v10 +; CHECK-NEXT: vsra.vv v10, v14, v10 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmerge.vvm v8, v14, v12, v0 ; CHECK-NEXT: ret %tmp = call @llvm.sshl.sat.nxv16i8( %x, %y) ret %tmp diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload-vpstore-output.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload-vpstore-output.ll index a8934bb25571c..081afcfab8dae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload-vpstore-output.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload-vpstore-output.ll @@ -16,8 +16,8 @@ define @strided_vpload_nxv1i8_i8(ptr %ptr, i8 signext %stride, ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: $v0 = COPY [[COPY1]] - ; CHECK-NEXT: [[PseudoVLSE8_V_MF8_MASK:%[0-9]+]]:vrnov0 = PseudoVLSE8_V_MF8_MASK $noreg, [[COPY3]], [[COPY2]], $v0, [[COPY]], 3 /* e8 */, 1 /* ta, mu */ :: (load unknown-size, align 1) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vmv0 = COPY [[COPY1]] + ; CHECK-NEXT: [[PseudoVLSE8_V_MF8_MASK:%[0-9]+]]:vrnov0 = PseudoVLSE8_V_MF8_MASK $noreg, [[COPY3]], [[COPY2]], [[COPY4]], [[COPY]], 3 /* e8 */, 1 /* ta, mu */ :: (load unknown-size, align 1) ; CHECK-NEXT: $v8 = COPY [[PseudoVLSE8_V_MF8_MASK]] ; CHECK-NEXT: PseudoRET implicit $v8 %load = call @llvm.experimental.vp.strided.load.nxv1i8.p0.i8(ptr %ptr, i8 %stride, %m, i32 %evl) @@ -36,8 +36,8 @@ define void @strided_vpstore_nxv1i8_i8( %val, ptr %ptr, i8 sign ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vr = COPY $v8 - ; CHECK-NEXT: $v0 = COPY [[COPY1]] - ; CHECK-NEXT: PseudoVSSE8_V_MF8_MASK [[COPY4]], [[COPY3]], [[COPY2]], $v0, [[COPY]], 3 /* e8 */ :: (store unknown-size, align 1) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vmv0 = COPY [[COPY1]] + ; CHECK-NEXT: PseudoVSSE8_V_MF8_MASK [[COPY4]], [[COPY3]], [[COPY2]], [[COPY5]], [[COPY]], 3 /* e8 */ :: (store unknown-size, align 1) ; CHECK-NEXT: PseudoRET call void @llvm.experimental.vp.strided.store.nxv1i8.p0.i8( %val, ptr %ptr, i8 %stride, %m, i32 %evl) ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index 0e2105d5cba86..7e4d211df5ee1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -577,7 +577,6 @@ declare @llvm.experimental.vp.strided.load.nxv3f64.p0.i32( define @strided_load_nxv16f64(ptr %ptr, i64 %stride, %mask, i32 zeroext %evl) { ; CHECK-RV32-LABEL: strided_load_nxv16f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vmv1r.v v9, v0 ; CHECK-RV32-NEXT: csrr a4, vlenb ; CHECK-RV32-NEXT: sub a2, a3, a4 ; CHECK-RV32-NEXT: sltu a5, a3, a2 @@ -591,7 +590,8 @@ define @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, %v, ptr %ptr, i32 sig ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a4, vlenb ; CHECK-NEXT: slli a6, a4, 1 -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: mv a5, a3 ; CHECK-NEXT: bltu a3, a6, .LBB43_2 ; CHECK-NEXT: # %bb.1: @@ -558,17 +557,8 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a7, a4 ; CHECK-NEXT: .LBB43_4: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: slli t0, t0, 3 -; CHECK-NEXT: sub sp, sp, t0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsse64.v v8, (a1), a2, v0.t ; CHECK-NEXT: sub a0, a5, a4 ; CHECK-NEXT: sltu t0, a5, a0 @@ -578,7 +568,8 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: add a7, a1, a7 ; CHECK-NEXT: srli t0, a4, 3 ; CHECK-NEXT: vsetvli t1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v24, t0 +; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vslidedown.vx v0, v0, t0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: sub a0, a3, a6 ; CHECK-NEXT: sltu a3, a3, a0 @@ -593,15 +584,9 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: add a1, a1, a3 ; CHECK-NEXT: srli a4, a4, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v24, a4 +; CHECK-NEXT: vslidedown.vx v0, v8, a4 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsse64.v v8, (a1), a2, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vsse64.v v24, (a1), a2, v0.t ; CHECK-NEXT: ret call void @llvm.experimental.vp.strided.store.nxv17f64.p0.i32( %v, ptr %ptr, i32 %stride, %mask, i32 %evl) ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll index 68e0c0089d0c7..a5dd27149c1f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll @@ -7,10 +7,10 @@ define @umulo_nxv1i8( %x, % ; CHECK-LABEL: umulo_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv1i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -25,10 +25,10 @@ define @umulo_nxv2i8( %x, % ; CHECK-LABEL: umulo_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv2i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -43,10 +43,10 @@ define @umulo_nxv4i8( %x, % ; CHECK-LABEL: umulo_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv4i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -61,10 +61,10 @@ define @umulo_nxv8i8( %x, % ; CHECK-LABEL: umulo_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv8i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -79,10 +79,10 @@ define @umulo_nxv16i8( %x, , } @llvm.umul.with.overflow.nxv16i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -97,10 +97,10 @@ define @umulo_nxv32i8( %x, , } @llvm.umul.with.overflow.nxv32i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -115,10 +115,10 @@ define @umulo_nxv64i8( %x, , } @llvm.umul.with.overflow.nxv64i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -133,10 +133,10 @@ define @umulo_nxv1i16( %x, , } @llvm.umul.with.overflow.nxv1i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -151,10 +151,10 @@ define @umulo_nxv2i16( %x, , } @llvm.umul.with.overflow.nxv2i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -169,10 +169,10 @@ define @umulo_nxv4i16( %x, , } @llvm.umul.with.overflow.nxv4i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -187,10 +187,10 @@ define @umulo_nxv8i16( %x, , } @llvm.umul.with.overflow.nxv8i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -205,10 +205,10 @@ define @umulo_nxv16i16( %x, , } @llvm.umul.with.overflow.nxv16i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -223,10 +223,10 @@ define @umulo_nxv32i16( %x, , } @llvm.umul.with.overflow.nxv32i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -241,10 +241,10 @@ define @umulo_nxv1i32( %x, , } @llvm.umul.with.overflow.nxv1i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -259,10 +259,10 @@ define @umulo_nxv2i32( %x, , } @llvm.umul.with.overflow.nxv2i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -277,10 +277,10 @@ define @umulo_nxv4i32( %x, , } @llvm.umul.with.overflow.nxv4i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -295,10 +295,10 @@ define @umulo_nxv8i32( %x, , } @llvm.umul.with.overflow.nxv8i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -313,10 +313,10 @@ define @umulo_nxv16i32( %x, , } @llvm.umul.with.overflow.nxv16i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -331,10 +331,10 @@ define @umulo_nxv1i64( %x, , } @llvm.umul.with.overflow.nxv1i64( %x, %y) %b = extractvalue { , } %a, 0 @@ -349,10 +349,10 @@ define @umulo_nxv2i64( %x, , } @llvm.umul.with.overflow.nxv2i64( %x, %y) %b = extractvalue { , } %a, 0 @@ -367,10 +367,10 @@ define @umulo_nxv4i64( %x, , } @llvm.umul.with.overflow.nxv4i64( %x, %y) %b = extractvalue { , } %a, 0 @@ -385,10 +385,10 @@ define @umulo_nxv8i64( %x, , } @llvm.umul.with.overflow.nxv8i64( %x, %y) %b = extractvalue { , } %a, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll index 4b5e737d22eb8..cf733226fe8cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -565,7 +565,7 @@ declare @llvm.vp.add.nxv128i8(, @vadd_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb @@ -581,7 +581,7 @@ define @vadd_vi_nxv128i8( %va, @llvm.vp.add.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) @@ -1343,10 +1343,10 @@ declare @llvm.vp.add.nxv32i32(, @vadd_vi_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -1360,7 +1360,7 @@ define @vadd_vi_nxv32i32( %va, @llvm.vp.add.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) @@ -1399,10 +1399,10 @@ declare i32 @llvm.vscale.i32() define @vadd_vi_nxv32i32_evl_nx8( %va, %m) { ; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a1, a0, 2 ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a1 ; CHECK-NEXT: slli a1, a0, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -1416,7 +1416,7 @@ define @vadd_vi_nxv32i32_evl_nx8( %va, @vfsgnj_vv_nxv32f16( %va, @vfsgnj_vv_nxv32f16( %va, , } @vector_deinterleave_load_nxv16i ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v8, a0 +; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v14, v8, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 ; CHECK-NEXT: vnsrl.wi v8, v12, 0 ; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vnsrl.wi v10, v12, 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 1347dfb6ff2ae..889e7d11c23ff 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -91,26 +91,25 @@ declare {, } @llvm.experimental.vector.deint define {, } @vector_deinterleave_nxv64i1_nxv128i1( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv64i1_nxv128i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v8 +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 0 +; CHECK-NEXT: vnsrl.wi v8, v16, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v28, v8, 0 +; CHECK-NEXT: vnsrl.wi v12, v24, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 8 -; CHECK-NEXT: vnsrl.wi v28, v8, 8 +; CHECK-NEXT: vnsrl.wi v0, v16, 8 +; CHECK-NEXT: vnsrl.wi v4, v24, 8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v16, v24, 0 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v9, v16 +; CHECK-NEXT: vmsne.vi v0, v0, 0 +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv128i1( %vec) ret {, } %retval diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index b55cdb3afedb9..2a0f0d52f0bb9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -9,16 +9,16 @@ define void @vector_interleave_store_nxv32i1_nxv16i1( %a, %a, @vector_interleave_nxv32i1_nxv16i1( ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v10, v12, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vwaddu.vv v16, v8, v12 +; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 +; CHECK-NEXT: vwaddu.vv v12, v8, v10 ; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vwmaccu.vx v16, a0, v12 -; CHECK-NEXT: vmsne.vi v2, v18, 0 -; CHECK-NEXT: vmsne.vi v0, v16, 0 +; CHECK-NEXT: vwmaccu.vx v12, a0, v10 +; CHECK-NEXT: vmsne.vi v2, v14, 0 +; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 @@ -30,14 +30,14 @@ define @vector_interleave_nxv32i1_nxv16i1( ; ; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1: ; ZVBB: # %bb.0: -; ZVBB-NEXT: vmv1r.v v9, v0 +; ZVBB-NEXT: vmv1r.v v16, v0 ; ZVBB-NEXT: vsetvli a0, zero, e8, m2, ta, mu ; ZVBB-NEXT: vmv.v.i v10, 0 ; ZVBB-NEXT: vmv1r.v v0, v8 -; ZVBB-NEXT: vmerge.vim v10, v10, 1, v0 -; ZVBB-NEXT: vwsll.vi v12, v10, 8 +; ZVBB-NEXT: vmerge.vim v8, v10, 1, v0 +; ZVBB-NEXT: vwsll.vi v12, v8, 8 ; ZVBB-NEXT: li a0, 1 -; ZVBB-NEXT: vmv1r.v v0, v9 +; ZVBB-NEXT: vmv1r.v v0, v16 ; ZVBB-NEXT: vwaddu.wx v12, v12, a0, v0.t ; ZVBB-NEXT: vmsne.vi v2, v14, 0 ; ZVBB-NEXT: vmsne.vi v0, v12, 0 @@ -122,9 +122,9 @@ define @vector_interleave_nxv4i64_nxv2i64( ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vand.vi v13, v12, 1 -; CHECK-NEXT: vmsne.vi v0, v13, 0 ; CHECK-NEXT: vsrl.vi v16, v12, 1 +; CHECK-NEXT: vand.vi v12, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 @@ -137,9 +137,9 @@ define @vector_interleave_nxv4i64_nxv2i64( ; ZVBB-NEXT: srli a0, a0, 2 ; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; ZVBB-NEXT: vid.v v12 -; ZVBB-NEXT: vand.vi v13, v12, 1 -; ZVBB-NEXT: vmsne.vi v0, v13, 0 ; ZVBB-NEXT: vsrl.vi v16, v12, 1 +; ZVBB-NEXT: vand.vi v12, v12, 1 +; ZVBB-NEXT: vmsne.vi v0, v12, 0 ; ZVBB-NEXT: vadd.vx v16, v16, a0, v0.t ; ZVBB-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; ZVBB-NEXT: vrgatherei16.vv v12, v8, v16 @@ -158,41 +158,42 @@ declare @llvm.experimental.vector.interleave2.nxv4i64( @vector_interleave_nxv128i1_nxv64i1( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv128i1_nxv64i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vwaddu.vv v24, v8, v16 +; CHECK-NEXT: vwaddu.vv v24, v16, v8 ; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vwmaccu.vx v24, a0, v16 +; CHECK-NEXT: vwmaccu.vx v24, a0, v8 ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vwaddu.vv v24, v12, v20 -; CHECK-NEXT: vwmaccu.vx v24, a0, v20 +; CHECK-NEXT: vwaddu.vv v24, v20, v12 +; CHECK-NEXT: vwmaccu.vx v24, a0, v12 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmsne.vi v8, v24, 0 ; CHECK-NEXT: ret ; ; ZVBB-LABEL: vector_interleave_nxv128i1_nxv64i1: ; ZVBB: # %bb.0: +; ZVBB-NEXT: vmv1r.v v24, v8 ; ZVBB-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; ZVBB-NEXT: vmv.v.i v24, 0 -; ZVBB-NEXT: vmerge.vim v16, v24, 1, v0 -; ZVBB-NEXT: vmv1r.v v0, v8 -; ZVBB-NEXT: vmerge.vim v8, v24, 1, v0 +; ZVBB-NEXT: vmv.v.i v16, 0 +; ZVBB-NEXT: vmerge.vim v8, v16, 1, v0 +; ZVBB-NEXT: vmv1r.v v0, v24 +; ZVBB-NEXT: vmerge.vim v16, v16, 1, v0 ; ZVBB-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; ZVBB-NEXT: vwsll.vi v24, v8, 8 -; ZVBB-NEXT: vwaddu.wv v24, v24, v16 +; ZVBB-NEXT: vwsll.vi v24, v16, 8 +; ZVBB-NEXT: vwaddu.wv v24, v24, v8 ; ZVBB-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; ZVBB-NEXT: vmsne.vi v0, v24, 0 ; ZVBB-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; ZVBB-NEXT: vwsll.vi v24, v12, 8 -; ZVBB-NEXT: vwaddu.wv v24, v24, v20 +; ZVBB-NEXT: vwsll.vi v24, v20, 8 +; ZVBB-NEXT: vwaddu.wv v24, v24, v12 ; ZVBB-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; ZVBB-NEXT: vmsne.vi v8, v24, 0 ; ZVBB-NEXT: ret @@ -296,9 +297,9 @@ define @vector_interleave_nxv16i64_nxv8i64( @vector_interleave_nxv16i64_nxv8i64( @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv16f64_nxv8f64( @vector_interleave_nxv16f64_nxv8f64( @llvm.experimental.vector.splice.nxv1i1( @splice_nxv1i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv1i1_offset_negone: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv1i1( %a, %b, i32 -1) @@ -31,21 +31,21 @@ define @splice_nxv1i1_offset_negone( %a, @splice_nxv1i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv1i1_offset_max: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 1 +; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv1i1( %a, %b, i32 1) @@ -57,19 +57,19 @@ declare @llvm.experimental.vector.splice.nxv2i1( @splice_nxv2i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv2i1_offset_negone: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv2i1( %a, %b, i32 -1) @@ -79,21 +79,21 @@ define @splice_nxv2i1_offset_negone( %a, @splice_nxv2i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv2i1_offset_max: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -3 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 3 +; CHECK-NEXT: vslidedown.vi v8, v8, 3 ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv2i1( %a, %b, i32 3) @@ -105,19 +105,19 @@ declare @llvm.experimental.vector.splice.nxv4i1( @splice_nxv4i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv4i1_offset_negone: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv4i1( %a, %b, i32 -1) @@ -127,21 +127,21 @@ define @splice_nxv4i1_offset_negone( %a, @splice_nxv4i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv4i1_offset_max: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -7 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 7 +; CHECK-NEXT: vslidedown.vi v8, v8, 7 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv4i1( %a, %b, i32 7) @@ -153,18 +153,18 @@ declare @llvm.experimental.vector.splice.nxv8i1( @splice_nxv8i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv8i1_offset_negone: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vslidedown.vx v9, v9, a0 -; CHECK-NEXT: vslideup.vi v9, v8, 1 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv8i1( %a, %b, i32 -1) @@ -174,20 +174,20 @@ define @splice_nxv8i1_offset_negone( %a, @splice_nxv8i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv8i1_offset_max: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -15 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v9, 15 +; CHECK-NEXT: vslidedown.vi v8, v8, 15 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a0 -; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vslideup.vx v8, v9, a0 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv8i1( %a, %b, i32 15) @@ -201,18 +201,18 @@ define @splice_nxv16i1_offset_negone( %a, < ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v10, v12, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m2, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -225,18 +225,18 @@ define @splice_nxv16i1_offset_max( %a, @splice_nxv32i1_offset_negone( %a, < ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 +; CHECK-NEXT: vmerge.vim v12, v16, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 2 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m4, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 1 +; CHECK-NEXT: vslideup.vi v8, v12, 1 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret @@ -274,19 +274,19 @@ define @splice_nxv32i1_offset_max( %a, @llvm.experimental.vector.splice.nxv32i1( %a, %b, i32 63) @@ -298,21 +298,21 @@ declare @llvm.experimental.vector.splice.nxv64i1( @splice_nxv64i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv64i1_offset_negone: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vslidedown.vx v16, v16, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 1 -; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vslideup.vi v16, v8, 1 +; CHECK-NEXT: vand.vi v8, v16, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv64i1( %a, %b, i32 -1) @@ -323,19 +323,19 @@ define @splice_nxv64i1_offset_max( %a, @llvm.experimental.vector.splice.nxv64i1( %a, %b, i32 127) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll index df2bc523cd7a8..35488d1e0d0b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll @@ -229,7 +229,6 @@ define @vfabs_vv_nxv32f16( %va, @vfabs_vv_nxv32f16( %va, @llvm.vp.fabs.nxv16f64(, @vfabs_vv_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfabs_vv_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -542,7 +542,7 @@ define @vfabs_vv_nxv16f64( %va, @llvm.vp.fabs.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll index c69a7bc5cece4..10d34457c3da4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll @@ -569,7 +569,6 @@ define @vfadd_vv_nxv32f16( %va, @vfadd_vv_nxv32f16( %va, @vfadd_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfadd_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -689,22 +688,23 @@ define @vfadd_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v3, v0 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vfadd.vv v24, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB24_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB24_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vfadd.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vfadd.vv v16, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll index 3ad17e85570a2..d5cc567e1b718 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll @@ -515,7 +515,6 @@ define @vfdiv_vv_nxv32f16( %va, @vfdiv_vv_nxv32f16( %va, @vfdiv_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfdiv_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -635,22 +634,23 @@ define @vfdiv_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v3, v0 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vfdiv.vv v24, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB22_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vfdiv.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vfdiv.vv v16, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index a41c262116136..eadb28f2f653c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -1108,11 +1108,10 @@ define @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfmax_vv_nxv32f16( %va, @vfmax_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmin_vv_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @vfmul_vv_nxv32f16( %va, @vfmul_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfmul_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -635,22 +634,23 @@ define @vfmul_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v3, v0 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vfmul.vv v24, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB22_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vfmul.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vfmul.vv v16, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll index 292f27794f378..84c441799fc5d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll @@ -1108,11 +1108,10 @@ define @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfneg_vv_nxv32f16( %va, @vfneg_vv_nxv32f16( %va, @llvm.vp.fneg.nxv16f64(, @vfneg_vv_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfneg_vv_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -542,7 +542,7 @@ define @vfneg_vv_nxv16f64( %va, @llvm.vp.fneg.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll index 5cfa98916a2de..5127933811d9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll @@ -96,10 +96,10 @@ declare @llvm.vp.fpext.nxv32f32.nxv32f16( @vfpext_nxv32f16_nxv32f32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vfpext_nxv32f16_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll index 15c4bf255e6dc..1fc23099d80a4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll @@ -394,10 +394,10 @@ declare @llvm.vp.fptosi.nxv32i16.nxv32f32( @vfptosi_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_nxv32i16_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -411,7 +411,7 @@ define @vfptosi_nxv32i16_nxv32f32( %va, ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret @@ -424,10 +424,10 @@ declare @llvm.vp.fptosi.nxv32i32.nxv32f32( @vfptosi_nxv32i32_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_nxv32i32_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -441,7 +441,7 @@ define @vfptosi_nxv32i32_nxv32f32( %va, ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.fptosi.nxv32i32.nxv32f32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll index a2591e7dc35f0..af7b4d45feb48 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll @@ -394,10 +394,10 @@ declare @llvm.vp.fptoui.nxv32i16.nxv32f32( @vfptoui_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_nxv32i16_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -411,7 +411,7 @@ define @vfptoui_nxv32i16_nxv32f32( %va, ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfncvt.rtz.xu.f.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret @@ -424,10 +424,10 @@ declare @llvm.vp.fptoui.nxv32i32.nxv32f32( @vfptoui_nxv32i32_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_nxv32i32_nxv32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -441,7 +441,7 @@ define @vfptoui_nxv32i32_nxv32f32( %va, ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.fptoui.nxv32i32.nxv32f32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll index dd122f1f25110..685653c856b3e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -96,39 +96,25 @@ declare @llvm.vp.fptrunc.nxv16f64.nxv16f32( @vfptrunc_nxv16f32_nxv16f64( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vfptrunc_nxv16f32_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t +; CHECK-NEXT: vfncvt.f.f.w v28, v16, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB7_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t +; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: ret %v = call @llvm.vp.fptrunc.nxv16f64.nxv16f32( %a, %m, i32 %vl) ret %v @@ -145,7 +131,6 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb @@ -155,48 +140,50 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: srli a4, a1, 2 ; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v16, v0, a4 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vslidedown.vx v8, v0, a4 ; CHECK-NEXT: slli a4, a1, 3 ; CHECK-NEXT: add a4, a0, a4 -; CHECK-NEXT: vl8re64.v v8, (a4) -; CHECK-NEXT: slli a4, a1, 1 -; CHECK-NEXT: sub a5, a2, a4 -; CHECK-NEXT: sltu a6, a2, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: sub a6, a5, a1 -; CHECK-NEXT: sltu a7, a5, a6 -; CHECK-NEXT: addi a7, a7, -1 -; CHECK-NEXT: and a6, a7, a6 -; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma -; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: vslidedown.vx v0, v16, a3 -; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v20, v8, v0.t -; CHECK-NEXT: bltu a5, a1, .LBB8_2 +; CHECK-NEXT: vl8re64.v v16, (a4) +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v25, v8 +; CHECK-NEXT: vslidedown.vx v0, v8, a3 +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: sub a4, a2, a3 +; CHECK-NEXT: sltu a5, a2, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: sub a5, a4, a1 +; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: sltu a0, a4, a5 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a5 +; CHECK-NEXT: bltu a4, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v7, a3 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t -; CHECK-NEXT: bltu a2, a4, .LBB8_4 +; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v28, v16, v0.t +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB8_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a2, a4 +; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB8_4: ; CHECK-NEXT: sub a0, a2, a1 ; CHECK-NEXT: sltu a3, a2, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v28, v8, v0.t +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vfncvt.f.f.w v12, v16, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB8_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a2, a1 @@ -207,9 +194,9 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfncvt.f.f.w v8, v16, v0.t +; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll index d6caad15e40a2..36dadb5f1323e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -229,7 +229,6 @@ define @vfsqrt_vv_nxv32f16( %va, @vfsqrt_vv_nxv32f16( %va, @llvm.vp.sqrt.nxv16f64(, @vfsqrt_vv_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfsqrt_vv_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -542,7 +542,7 @@ define @vfsqrt_vv_nxv16f64( %va, @llvm.vp.sqrt.nxv16f64( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll index 2eae18d7cc493..c8b894cbdbb16 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll @@ -515,7 +515,6 @@ define @vfsub_vv_nxv32f16( %va, @vfsub_vv_nxv32f16( %va, @vfsub_vf_nxv32f16( %va, half %b ; ; ZVFHMIN-LABEL: vfsub_vf_nxv32f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v3, v0 ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v24 +; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -635,22 +634,23 @@ define @vfsub_vf_nxv32f16( %va, half %b ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v3, v0 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vfsub.vv v24, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: bltu a0, a1, .LBB22_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB22_2: -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vfsub.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vfsub.vv v16, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll index ca0bbfd65ca29..ceceff23c868f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll @@ -710,17 +710,31 @@ define @vfnmacc_vf_nxv16f32_commute( % ; ; ZVFHMIN-LABEL: vfnmacc_vf_nxv16f32_commute: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv4r.v v24, v8 +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v8, fa5 +; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfnmadd.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfnmadd.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll index 2797ca2eb3163..bfef07b1b44da 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-vp.ll @@ -682,17 +682,31 @@ define @vfnmsac_vf_nxv16f32_commute( % ; ; ZVFHMIN-LABEL: vfnmsac_vf_nxv16f32_commute: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv4r.v v24, v8 +; ZVFHMIN-NEXT: addi sp, sp, -16 +; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 +; ZVFHMIN-NEXT: csrr a1, vlenb +; ZVFHMIN-NEXT: slli a1, a1, 3 +; ZVFHMIN-NEXT: sub sp, sp, a1 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFHMIN-NEXT: addi a1, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v8, fa5 +; ZVFHMIN-NEXT: vfmv.v.f v24, fa5 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v8 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v4, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8, v0.t +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vfnmsub.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfnmsub.vv v24, v16, v8, v0.t +; ZVFHMIN-NEXT: vmv.v.v v8, v24 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 3 +; ZVFHMIN-NEXT: add sp, sp, a0 +; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %elt.head = insertelement poison, half %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll index 15cb42bacf173..a8cf307e6fadd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll @@ -49,8 +49,8 @@ define i64 @test_vleff_nxv8i8_mask( %maskedoff, ptr %p, %val, ptr %base, @llvm.vp.smax.nxv128i8(, @vmax_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb @@ -424,7 +424,7 @@ define @vmax_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -970,10 +970,10 @@ declare @llvm.vp.smax.nxv32i32(, @vmax_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: sub a3, a1, a2 @@ -987,7 +987,7 @@ define @vmax_vx_nxv32i32( %va, i32 %b, poison, i32 %b, i32 0 @@ -1030,10 +1030,10 @@ declare i32 @llvm.vscale.i32() define @vmax_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a2, a1, 1 ; CHECK-NEXT: sub a3, a1, a2 @@ -1047,7 +1047,7 @@ define @vmax_vx_nxv32i32_evl_nx8( %va, i3 ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll index 1f620a44dbbc8..c9ebae3d46db0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -410,7 +410,7 @@ declare @llvm.vp.umax.nxv128i8(, @vmaxu_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb @@ -426,7 +426,7 @@ define @vmaxu_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -972,10 +972,10 @@ declare @llvm.vp.umax.nxv32i32(, @vmaxu_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: sub a3, a1, a2 @@ -989,7 +989,7 @@ define @vmaxu_vx_nxv32i32( %va, i32 %b, < ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB80_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1032,10 +1032,10 @@ declare i32 @llvm.vscale.i32() define @vmaxu_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a2, a1, 1 ; CHECK-NEXT: sub a3, a1, a2 @@ -1049,7 +1049,7 @@ define @vmaxu_vx_nxv32i32_evl_nx8( %va, i ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll index db568dd0f8d84..dd1547fa72807 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfeq.mask.nxv1f16( define @intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfeq.mask.nxv2f16( define @intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfeq.mask.nxv4f16( define @intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfeq.mask.nxv8f16( define @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: vmfeq.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfeq.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfeq.mask.nxv16f16( define @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: vmfeq.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfeq.mask.nxv1f32( define @intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfeq.mask.nxv2f32( define @intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfeq.mask.nxv4f32( define @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: vmfeq.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfeq.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfeq.mask.nxv8f32( define @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: vmfeq.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfeq.mask.nxv1f64( define @intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfeq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfeq.vv v0, v8, v9 ; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfeq.mask.nxv2f64( define @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: vmfeq.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfeq.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfeq.mask.nxv4f64( define @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: vmfeq.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfeq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfeq.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll index 73e47dae0e574..c78e8d70b633e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfge.mask.nxv1f16( define @intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfge.mask.nxv2f16( define @intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfge.mask.nxv4f16( define @intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfge.mask.nxv8f16( define @intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfle.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfge.mask.nxv16f16( define @intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfge.mask.nxv1f32( define @intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfge.mask.nxv2f32( define @intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfge.mask.nxv4f32( define @intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfle.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfge.mask.nxv8f32( define @intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfge.mask.nxv1f64( define @intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v9, v8 ; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfge.mask.nxv2f64( define @intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfle.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfge.mask.nxv4f64( define @intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfge.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll index 802981f44dac1..b5299faf30b49 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfgt.mask.nxv1f16( define @intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfgt.mask.nxv2f16( define @intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfgt.mask.nxv4f16( define @intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfgt.mask.nxv8f16( define @intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmflt.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmflt.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfgt.mask.nxv16f16( define @intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmflt.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfgt.mask.nxv1f32( define @intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfgt.mask.nxv2f32( define @intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfgt.mask.nxv4f32( define @intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmflt.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmflt.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfgt.mask.nxv8f32( define @intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmflt.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfgt.mask.nxv1f64( define @intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v9, v8 ; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfgt.mask.nxv2f64( define @intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmflt.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmflt.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfgt.mask.nxv4f64( define @intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmflt.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmflt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfgt.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll index aa04ca561a6f2..383b175181c57 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfle.mask.nxv1f16( define @intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfle.mask.nxv2f16( define @intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfle.mask.nxv4f16( define @intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfle.mask.nxv8f16( define @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfle.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmfle.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfle.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfle.mask.nxv16f16( define @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfle.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmfle.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfle.mask.nxv1f32( define @intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfle.mask.nxv2f32( define @intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfle.mask.nxv4f32( define @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfle.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmfle.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfle.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfle.mask.nxv8f32( define @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfle.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmfle.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfle.mask.nxv1f64( define @intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfle.vv v0, v8, v9 ; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfle.mask.nxv2f64( define @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfle.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmfle.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfle.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfle.mask.nxv4f64( define @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfle.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmfle.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfle.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll index 11ea0309c07bd..7d0abe3701276 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmflt.mask.nxv1f16( define @intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmflt.mask.nxv2f16( define @intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmflt.mask.nxv4f16( define @intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmflt.mask.nxv8f16( define @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmflt.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmflt.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmflt.mask.nxv16f16( define @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmflt.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmflt.mask.nxv1f32( define @intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmflt.mask.nxv2f32( define @intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmflt.mask.nxv4f32( define @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmflt.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmflt.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmflt.mask.nxv8f32( define @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmflt.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmflt.mask.nxv1f64( define @intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmflt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmflt.vv v0, v8, v9 ; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmflt.mask.nxv2f64( define @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmflt.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmflt.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmflt.mask.nxv4f64( define @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmflt.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmflt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmflt.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll index d0b64fa7d4f9f..db077b1b5513b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmfne.mask.nxv1f16( define @intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f16_nxv1f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmfne.mask.nxv2f16( define @intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f16_nxv2f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmfne.mask.nxv4f16( define @intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f16_nxv4f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -190,12 +187,11 @@ declare @llvm.riscv.vmfne.mask.nxv8f16( define @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmfne.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: vmfne.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfne.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv8f16( @@ -242,12 +238,11 @@ declare @llvm.riscv.vmfne.mask.nxv16f16( define @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfne.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: vmfne.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv16f16( @@ -294,10 +289,9 @@ declare @llvm.riscv.vmfne.mask.nxv1f32( define @intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f32_nxv1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -346,10 +340,9 @@ declare @llvm.riscv.vmfne.mask.nxv2f32( define @intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f32_nxv2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -398,12 +391,11 @@ declare @llvm.riscv.vmfne.mask.nxv4f32( define @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmfne.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: vmfne.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfne.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv4f32( @@ -450,12 +442,11 @@ declare @llvm.riscv.vmfne.mask.nxv8f32( define @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmfne.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: vmfne.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv8f32( @@ -502,10 +493,9 @@ declare @llvm.riscv.vmfne.mask.nxv1f64( define @intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1f64_nxv1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmfne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmfne.vv v0, v8, v9 ; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -554,12 +544,11 @@ declare @llvm.riscv.vmfne.mask.nxv2f64( define @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmfne.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: vmfne.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmfne.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv2f64( @@ -606,12 +595,11 @@ declare @llvm.riscv.vmfne.mask.nxv4f64( define @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmfne.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: vmfne.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmfne.nxv4f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 8fabf93356aeb..34ee1cc4a0123 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -408,7 +408,7 @@ declare @llvm.vp.smin.nxv128i8(, @vmin_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb @@ -424,7 +424,7 @@ define @vmin_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -970,10 +970,10 @@ declare @llvm.vp.smin.nxv32i32(, @vmin_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: sub a3, a1, a2 @@ -987,7 +987,7 @@ define @vmin_vx_nxv32i32( %va, i32 %b, poison, i32 %b, i32 0 @@ -1030,10 +1030,10 @@ declare i32 @llvm.vscale.i32() define @vmin_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a2, a1, 1 ; CHECK-NEXT: sub a3, a1, a2 @@ -1047,7 +1047,7 @@ define @vmin_vx_nxv32i32_evl_nx8( %va, i3 ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll index 8ec85e545a0f8..0e69992ad8791 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -410,7 +410,7 @@ declare @llvm.vp.umin.nxv128i8(, @vminu_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb @@ -426,7 +426,7 @@ define @vminu_vx_nxv128i8( %va, i8 %b, poison, i8 %b, i32 0 @@ -972,10 +972,10 @@ declare @llvm.vp.umin.nxv32i32(, @vminu_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 ; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: sub a3, a1, a2 @@ -989,7 +989,7 @@ define @vminu_vx_nxv32i32( %va, i32 %b, < ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB80_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 @@ -1032,10 +1032,10 @@ declare i32 @llvm.vscale.i32() define @vminu_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vminu_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a2, a1, 1 ; CHECK-NEXT: sub a3, a1, a2 @@ -1049,7 +1049,7 @@ define @vminu_vx_nxv32i32_evl_nx8( %va, i ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB82_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll index bc3fbcbb4cb43..f96ab33d181a0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i8( define @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmseq.mask.nxv2i8( define @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmseq.mask.nxv4i8( define @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmseq.mask.nxv8i8( define @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmseq.mask.nxv16i8( define @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmseq.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmseq.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmseq.mask.nxv32i8( define @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmseq.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i16( define @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmseq.mask.nxv2i16( define @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmseq.mask.nxv4i16( define @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmseq.mask.nxv8i16( define @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmseq.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmseq.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmseq.mask.nxv16i16( define @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmseq.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i32( define @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmseq.mask.nxv2i32( define @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmseq.mask.nxv4i32( define @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmseq.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmseq.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmseq.mask.nxv8i32( define @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmseq.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmseq.mask.nxv1i64( define @intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmseq.vv v0, v8, v9 ; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmseq.mask.nxv2i64( define @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmseq.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmseq.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmseq.mask.nxv4i64( define @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmseq.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmseq.nxv4i64( @@ -1692,11 +1674,11 @@ define @intrinsic_vmseq_mask_vx_nxv1i64_i64( ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmseq.vv v10, v8, v11, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmseq.vv v11, v8, v10, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll index 2ed626c7751e5..133ae98327992 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i8( define @intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsge.mask.nxv2i8( define @intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsge.mask.nxv4i8( define @intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsge.mask.nxv8i8( define @intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsge.mask.nxv16i8( define @intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsle.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsle.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsge.mask.nxv32i8( define @intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsle.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i16( define @intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsge.mask.nxv2i16( define @intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsge.mask.nxv4i16( define @intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsge.mask.nxv8i16( define @intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsle.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsle.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsge.mask.nxv16i16( define @intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsle.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i32( define @intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsge.mask.nxv2i32( define @intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsge.mask.nxv4i32( define @intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsle.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsle.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsge.mask.nxv8i32( define @intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsle.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsge.mask.nxv1i64( define @intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsge.mask.nxv2i64( define @intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsle.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsle.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsge.mask.nxv4i64( define @intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsle.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsle.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsge.nxv4i64( @@ -1708,11 +1690,11 @@ define @intrinsic_vmsge_mask_vx_nxv1i64_i64( ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmsle.vv v10, v11, v8, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmsle.vv v11, v10, v8, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll index 0ce7dae12a1e4..c9abf436cb36a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i8( define @intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i8( define @intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i8( define @intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i8( define @intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i8( define @intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsleu.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv32i8( define @intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i16( define @intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i16( define @intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i16( define @intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i16( define @intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsleu.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv16i16( define @intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i32( define @intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i32( define @intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i32( define @intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsleu.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv8i32( define @intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsgeu.mask.nxv1i64( define @intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv2i64( define @intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsleu.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsgeu.mask.nxv4i64( define @intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgeu.nxv4i64( @@ -1708,11 +1690,11 @@ define @intrinsic_vmsgeu_mask_vx_nxv1i64_i64( ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmsleu.vv v10, v11, v8, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmsleu.vv v11, v10, v8, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll index aed9882de6266..7d544af8b8b08 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i8( define @intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsgt.mask.nxv2i8( define @intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsgt.mask.nxv4i8( define @intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsgt.mask.nxv8i8( define @intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsgt.mask.nxv16i8( define @intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmslt.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmslt.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsgt.mask.nxv32i8( define @intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmslt.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i16( define @intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsgt.mask.nxv2i16( define @intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsgt.mask.nxv4i16( define @intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsgt.mask.nxv8i16( define @intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmslt.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmslt.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsgt.mask.nxv16i16( define @intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmslt.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i32( define @intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsgt.mask.nxv2i32( define @intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsgt.mask.nxv4i32( define @intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmslt.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmslt.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsgt.mask.nxv8i32( define @intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmslt.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsgt.mask.nxv1i64( define @intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v9, v8 ; CHECK-NEXT: vmslt.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsgt.mask.nxv2i64( define @intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmslt.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmslt.vv v0, v10, v8 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmslt.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsgt.mask.nxv4i64( define @intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmslt.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmslt.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgt.nxv4i64( @@ -1692,11 +1674,11 @@ define @intrinsic_vmsgt_mask_vx_nxv1i64_i64( ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmslt.vv v10, v11, v8, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmslt.vv v11, v10, v8, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll index e798d39e69946..65a9ab51ec482 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i8( define @intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i8( define @intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i8( define @intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i8( define @intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i8( define @intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsltu.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv32i8( define @intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i16( define @intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i16( define @intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i16( define @intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i16( define @intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsltu.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv16i16( define @intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i32( define @intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i32( define @intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i32( define @intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsltu.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv8i32( define @intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsgtu.mask.nxv1i64( define @intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v9, v8 ; CHECK-NEXT: vmsltu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv2i64( define @intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v2, v10, v8 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsltu.vv v4, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsgtu.mask.nxv4i64( define @intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v4, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v12, v8 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsltu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsgtu.nxv4i64( @@ -1692,11 +1674,11 @@ define @intrinsic_vmsgtu_mask_vx_nxv1i64_i64( ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmsltu.vv v10, v11, v8, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmsltu.vv v11, v10, v8, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll index a7a754fe4a20d..93d385206554d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i8( define @intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsle.mask.nxv2i8( define @intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsle.mask.nxv4i8( define @intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsle.mask.nxv8i8( define @intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsle.mask.nxv16i8( define @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsle.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsle.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsle.mask.nxv32i8( define @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsle.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i16( define @intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsle.mask.nxv2i16( define @intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsle.mask.nxv4i16( define @intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsle.mask.nxv8i16( define @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsle.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsle.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsle.mask.nxv16i16( define @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsle.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i32( define @intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsle.mask.nxv2i32( define @intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsle.mask.nxv4i32( define @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsle.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsle.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsle.mask.nxv8i32( define @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsle.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsle.mask.nxv1i64( define @intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsle.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v8, v9 ; CHECK-NEXT: vmsle.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsle.mask.nxv2i64( define @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsle.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsle.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsle.mask.nxv4i64( define @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsle.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsle.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsle.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsle.nxv4i64( @@ -1692,11 +1674,11 @@ define @intrinsic_vmsle_mask_vx_nxv1i64_i64( ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmsle.vv v10, v8, v11, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmsle.vv v11, v8, v10, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll index c382d47ea83b0..834536595205b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i8( define @intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsleu.mask.nxv2i8( define @intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsleu.mask.nxv4i8( define @intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsleu.mask.nxv8i8( define @intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsleu.mask.nxv16i8( define @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsleu.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsleu.mask.nxv32i8( define @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i16( define @intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsleu.mask.nxv2i16( define @intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsleu.mask.nxv4i16( define @intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsleu.mask.nxv8i16( define @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsleu.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsleu.mask.nxv16i16( define @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i32( define @intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsleu.mask.nxv2i32( define @intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsleu.mask.nxv4i32( define @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsleu.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsleu.mask.nxv8i32( define @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsleu.mask.nxv1i64( define @intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v8, v9 ; CHECK-NEXT: vmsleu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsleu.mask.nxv2i64( define @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v10 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsleu.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsleu.mask.nxv4i64( define @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsleu.vv v0, v8, v12 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsleu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsleu.nxv4i64( @@ -1692,11 +1674,11 @@ define @intrinsic_vmsleu_mask_vx_nxv1i64_i64( ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmsleu.vv v10, v8, v11, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmsleu.vv v11, v8, v10, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll index 9ba5a4045cfa4..5bf07a0ab10ef 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i8( define @intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmslt.mask.nxv2i8( define @intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmslt.mask.nxv4i8( define @intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmslt.mask.nxv8i8( define @intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmslt.mask.nxv16i8( define @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmslt.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmslt.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmslt.mask.nxv32i8( define @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmslt.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i16( define @intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmslt.mask.nxv2i16( define @intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmslt.mask.nxv4i16( define @intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmslt.mask.nxv8i16( define @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmslt.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmslt.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmslt.mask.nxv16i16( define @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmslt.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i32( define @intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmslt.mask.nxv2i32( define @intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmslt.mask.nxv4i32( define @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmslt.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmslt.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmslt.mask.nxv8i32( define @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmslt.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmslt.mask.nxv1i64( define @intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmslt.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmslt.vv v0, v8, v9 ; CHECK-NEXT: vmslt.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmslt.mask.nxv2i64( define @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmslt.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v10 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmslt.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmslt.mask.nxv4i64( define @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmslt.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmslt.vv v0, v8, v12 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmslt.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmslt.nxv4i64( @@ -1692,11 +1674,11 @@ define @intrinsic_vmslt_mask_vx_nxv1i64_i64( ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmslt.vv v10, v8, v11, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmslt.vv v11, v8, v10, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll index 2a92357b386da..2068ec28fbe99 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i8( define @intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsltu.mask.nxv2i8( define @intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsltu.mask.nxv4i8( define @intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsltu.mask.nxv8i8( define @intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsltu.mask.nxv16i8( define @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsltu.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsltu.mask.nxv32i8( define @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i16( define @intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsltu.mask.nxv2i16( define @intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsltu.mask.nxv4i16( define @intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsltu.mask.nxv8i16( define @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsltu.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsltu.mask.nxv16i16( define @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i32( define @intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsltu.mask.nxv2i32( define @intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsltu.mask.nxv4i32( define @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsltu.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsltu.mask.nxv8i32( define @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsltu.mask.nxv1i64( define @intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsltu.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsltu.vv v0, v8, v9 ; CHECK-NEXT: vmsltu.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsltu.mask.nxv2i64( define @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsltu.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v10 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsltu.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsltu.mask.nxv4i64( define @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsltu.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsltu.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsltu.nxv4i64( @@ -1692,11 +1674,11 @@ define @intrinsic_vmsltu_mask_vx_nxv1i64_i64( ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmsltu.vv v10, v8, v11, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmsltu.vv v11, v8, v10, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll index 27f2dfea36b4c..88a09e0013d9f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll @@ -34,10 +34,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i8( define @intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare @llvm.riscv.vmsne.mask.nxv2i8( define @intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare @llvm.riscv.vmsne.mask.nxv4i8( define @intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare @llvm.riscv.vmsne.mask.nxv8i8( define @intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare @llvm.riscv.vmsne.mask.nxv16i8( define @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsne.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsne.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv16i8( @@ -294,12 +289,11 @@ declare @llvm.riscv.vmsne.mask.nxv32i8( define @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsne.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv32i8( @@ -346,10 +340,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i16( define @intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare @llvm.riscv.vmsne.mask.nxv2i16( define @intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare @llvm.riscv.vmsne.mask.nxv4i16( define @intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare @llvm.riscv.vmsne.mask.nxv8i16( define @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsne.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsne.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv8i16( @@ -554,12 +544,11 @@ declare @llvm.riscv.vmsne.mask.nxv16i16( define @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsne.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv16i16( @@ -606,10 +595,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i32( define @intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare @llvm.riscv.vmsne.mask.nxv2i32( define @intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare @llvm.riscv.vmsne.mask.nxv4i32( define @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsne.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsne.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv4i32( @@ -762,12 +748,11 @@ declare @llvm.riscv.vmsne.mask.nxv8i32( define @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsne.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv8i32( @@ -814,10 +799,9 @@ declare @llvm.riscv.vmsne.mask.nxv1i64( define @intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsne.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsne.vv v0, v8, v9 ; CHECK-NEXT: vmsne.vv v11, v9, v10, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare @llvm.riscv.vmsne.mask.nxv2i64( define @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsne.vv v2, v8, v10 -; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v10 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vmsne.vv v4, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv2i64( @@ -918,12 +901,11 @@ declare @llvm.riscv.vmsne.mask.nxv4i64( define @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsne.vv v4, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmsne.vv v0, v8, v12 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmsne.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret entry: %mask = call @llvm.riscv.vmsne.nxv4i64( @@ -1692,11 +1674,11 @@ define @intrinsic_vmsne_mask_vx_nxv1i64_i64( ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmsne.vv v10, v8, v11, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmsne.vv v11, v8, v10, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll index 8b1660283cb7d..d0f2ce1ca8004 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-int.ll @@ -450,14 +450,14 @@ define @test_vp_reverse_nxv64i8_masked( %sr ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vrsub.vx v24, v16, a2 +; CHECK-NEXT: vrsub.vx v16, v16, a2 ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v20, v8, v24 -; CHECK-NEXT: vrgatherei16.vv v16, v12, v24 +; CHECK-NEXT: vrgatherei16.vv v28, v8, v16 +; CHECK-NEXT: vrgatherei16.vv v24, v12, v16 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub a1, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v16, a1, v0.t +; CHECK-NEXT: vslidedown.vx v8, v24, a1, v0.t ; CHECK-NEXT: ret %dst = call @llvm.experimental.vp.reverse.nxv64i8( %src, %mask, i32 %evl) ret %dst diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll index a30ebf2d33b50..9b99cfe39b574 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask-fixed-vectors.ll @@ -7,15 +7,15 @@ define <2 x i1> @test_vp_reverse_v2i1_masked(<2 x i1> %src, <2 x i1> %mask, i32 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmerge.vim v11, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vrsub.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t -; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: vrgatherei16.vv v10, v11, v9, v0.t +; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t ; CHECK-NEXT: ret %dst = call <2 x i1> @llvm.experimental.vp.reverse.v2i1(<2 x i1> %src, <2 x i1> %mask, i32 %evl) ret <2 x i1> %dst @@ -44,15 +44,15 @@ define <4 x i1> @test_vp_reverse_v4i1_masked(<4 x i1> %src, <4 x i1> %mask, i32 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmerge.vim v11, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vrsub.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t -; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: vrgatherei16.vv v10, v11, v9, v0.t +; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t ; CHECK-NEXT: ret %dst = call <4 x i1> @llvm.experimental.vp.reverse.v4i1(<4 x i1> %src, <4 x i1> %mask, i32 %evl) ret <4 x i1> %dst @@ -81,15 +81,15 @@ define <8 x i1> @test_vp_reverse_v8i1_masked(<8 x i1> %src, <8 x i1> %mask, i32 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmerge.vim v11, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vrsub.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t -; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: vrgatherei16.vv v10, v11, v9, v0.t +; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t ; CHECK-NEXT: ret %dst = call <8 x i1> @llvm.experimental.vp.reverse.v8i1(<8 x i1> %src, <8 x i1> %mask, i32 %evl) ret <8 x i1> %dst @@ -118,15 +118,15 @@ define <16 x i1> @test_vp_reverse_v16i1_masked(<16 x i1> %src, <16 x i1> %mask, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmerge.vim v12, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t -; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t +; CHECK-NEXT: vrgatherei16.vv v9, v12, v10, v0.t +; CHECK-NEXT: vmsne.vi v0, v9, 0, v0.t ; CHECK-NEXT: ret %dst = call <16 x i1> @llvm.experimental.vp.reverse.v16i1(<16 x i1> %src, <16 x i1> %mask, i32 %evl) ret <16 x i1> %dst diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll index 6522f0e9efe65..b8d6fa45e88b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll @@ -6,15 +6,15 @@ define @test_vp_reverse_nxv1i1_masked( %src, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmerge.vim v11, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vrsub.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t -; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: vrgatherei16.vv v10, v11, v9, v0.t +; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t ; CHECK-NEXT: ret %dst = call @llvm.experimental.vp.reverse.nxv1i1( %src, %mask, i32 %evl) ret %dst @@ -43,15 +43,15 @@ define @test_vp_reverse_nxv2i1_masked( %src, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmerge.vim v11, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vrsub.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t -; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: vrgatherei16.vv v10, v11, v9, v0.t +; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t ; CHECK-NEXT: ret %dst = call @llvm.experimental.vp.reverse.nxv2i1( %src, %mask, i32 %evl) ret %dst @@ -80,15 +80,15 @@ define @test_vp_reverse_nxv4i1_masked( %src, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmerge.vim v11, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: vid.v v9, v0.t ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t +; CHECK-NEXT: vrsub.vx v9, v9, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; CHECK-NEXT: vrgatherei16.vv v11, v9, v10, v0.t -; CHECK-NEXT: vmsne.vi v0, v11, 0, v0.t +; CHECK-NEXT: vrgatherei16.vv v10, v11, v9, v0.t +; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t ; CHECK-NEXT: ret %dst = call @llvm.experimental.vp.reverse.nxv4i1( %src, %mask, i32 %evl) ret %dst @@ -117,15 +117,15 @@ define @test_vp_reverse_nxv8i1_masked( %src, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vmerge.vim v12, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vid.v v10, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v10, v10, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v9, v10, v0.t -; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t +; CHECK-NEXT: vrgatherei16.vv v9, v12, v10, v0.t +; CHECK-NEXT: vmsne.vi v0, v9, 0, v0.t ; CHECK-NEXT: ret %dst = call @llvm.experimental.vp.reverse.nxv8i1( %src, %mask, i32 %evl) ret %dst @@ -154,15 +154,15 @@ define @test_vp_reverse_nxv16i1_masked( %sr ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v16, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vid.v v12, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vrgatherei16.vv v16, v10, v12, v0.t -; CHECK-NEXT: vmsne.vi v0, v16, 0, v0.t +; CHECK-NEXT: vrgatherei16.vv v10, v16, v12, v0.t +; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t ; CHECK-NEXT: ret %dst = call @llvm.experimental.vp.reverse.nxv16i1( %src, %mask, i32 %evl) ret %dst @@ -191,15 +191,15 @@ define @test_vp_reverse_nxv32i1_masked( %sr ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vmerge.vim v24, v12, 1, v0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vid.v v16, v0.t ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v24, v12, v16, v0.t -; CHECK-NEXT: vmsne.vi v0, v24, 0, v0.t +; CHECK-NEXT: vrgatherei16.vv v12, v24, v16, v0.t +; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t ; CHECK-NEXT: ret %dst = call @llvm.experimental.vp.reverse.nxv32i1( %src, %mask, i32 %evl) ret %dst @@ -226,24 +226,25 @@ define @test_vp_reverse_nxv32i1( %src, i32 define @test_vp_reverse_nxv64i1_masked( %src, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_vp_reverse_nxv64i1_masked: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a2, a1, 2 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: vsetvli a3, zero, e16, m8, ta, ma ; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vrsub.vx v0, v16, a2 +; CHECK-NEXT: vrsub.vx v24, v16, a2 ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v20, v24, v0 -; CHECK-NEXT: vrgatherei16.vv v16, v28, v0 +; CHECK-NEXT: vrgatherei16.vv v20, v8, v24 +; CHECK-NEXT: vrgatherei16.vv v16, v12, v24 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub a1, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vslidedown.vx v16, v16, a1, v0.t -; CHECK-NEXT: vmsne.vi v0, v16, 0, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vslidedown.vx v8, v16, a1, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %dst = call @llvm.experimental.vp.reverse.nxv64i1( %src, %mask, i32 %evl) ret %dst diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll index ce0ae2022885a..b0ea1fa591159 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-fixed-vectors.ll @@ -10,15 +10,15 @@ declare <16 x i1> @llvm.experimental.vp.splice.v16i1(<16 x i1>, <16 x i1>, i32, define <2 x i1> @test_vp_splice_v2i1(<2 x i1> %va, <2 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v2i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v9, 5 @@ -34,15 +34,15 @@ define <2 x i1> @test_vp_splice_v2i1(<2 x i1> %va, <2 x i1> %vb, i32 zeroext %ev define <2 x i1> @test_vp_splice_v2i1_negative_offset(<2 x i1> %va, <2 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v2i1_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetivli zero, 5, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vx v9, v9, a0 @@ -58,23 +58,23 @@ define <2 x i1> @test_vp_splice_v2i1_negative_offset(<2 x i1> %va, <2 x i1> %vb, define <2 x i1> @test_vp_splice_v2i1_masked(<2 x i1> %va, <2 x i1> %vb, <2 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v2i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t +; CHECK-NEXT: vslidedown.vi v9, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t +; CHECK-NEXT: vslideup.vx v9, v8, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t +; CHECK-NEXT: vmsne.vi v0, v9, 0, v0.t ; CHECK-NEXT: ret %v = call <2 x i1> @llvm.experimental.vp.splice.v2i1(<2 x i1> %va, <2 x i1> %vb, i32 5, <2 x i1> %mask, i32 %evla, i32 %evlb) ret <2 x i1> %v @@ -83,15 +83,15 @@ define <2 x i1> @test_vp_splice_v2i1_masked(<2 x i1> %va, <2 x i1> %vb, <2 x i1> define <4 x i1> @test_vp_splice_v4i1(<4 x i1> %va, <4 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v4i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v9, 5 @@ -107,15 +107,15 @@ define <4 x i1> @test_vp_splice_v4i1(<4 x i1> %va, <4 x i1> %vb, i32 zeroext %ev define <4 x i1> @test_vp_splice_v4i1_negative_offset(<4 x i1> %va, <4 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v4i1_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetivli zero, 5, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v9, v9, a0 @@ -131,23 +131,23 @@ define <4 x i1> @test_vp_splice_v4i1_negative_offset(<4 x i1> %va, <4 x i1> %vb, define <4 x i1> @test_vp_splice_v4i1_masked(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v4i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t +; CHECK-NEXT: vslidedown.vi v9, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t +; CHECK-NEXT: vslideup.vx v9, v8, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t +; CHECK-NEXT: vmsne.vi v0, v9, 0, v0.t ; CHECK-NEXT: ret %v = call <4 x i1> @llvm.experimental.vp.splice.v4i1(<4 x i1> %va, <4 x i1> %vb, i32 5, <4 x i1> %mask, i32 %evla, i32 %evlb) ret <4 x i1> %v @@ -156,15 +156,15 @@ define <4 x i1> @test_vp_splice_v4i1_masked(<4 x i1> %va, <4 x i1> %vb, <4 x i1> define <8 x i1> @test_vp_splice_v8i1(<8 x i1> %va, <8 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v9, 5 @@ -180,15 +180,15 @@ define <8 x i1> @test_vp_splice_v8i1(<8 x i1> %va, <8 x i1> %vb, i32 zeroext %ev define <8 x i1> @test_vp_splice_v8i1_negative_offset(<8 x i1> %va, <8 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v8i1_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v9, v9, a0 @@ -204,23 +204,23 @@ define <8 x i1> @test_vp_splice_v8i1_negative_offset(<8 x i1> %va, <8 x i1> %vb, define <8 x i1> @test_vp_splice_v8i1_masked(<8 x i1> %va, <8 x i1> %vb, <8 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v8i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t +; CHECK-NEXT: vslidedown.vi v9, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t +; CHECK-NEXT: vslideup.vx v9, v8, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t +; CHECK-NEXT: vmsne.vi v0, v9, 0, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.experimental.vp.splice.v8i1(<8 x i1> %va, <8 x i1> %vb, i32 5, <8 x i1> %mask, i32 %evla, i32 %evlb) ret <8 x i1> %v @@ -229,15 +229,15 @@ define <8 x i1> @test_vp_splice_v8i1_masked(<8 x i1> %va, <8 x i1> %vb, <8 x i1> define <16 x i1> @test_vp_splice_v16i1(<16 x i1> %va, <16 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v9, 5 @@ -253,15 +253,15 @@ define <16 x i1> @test_vp_splice_v16i1(<16 x i1> %va, <16 x i1> %vb, i32 zeroext define <16 x i1> @test_vp_splice_v16i1_negative_offset(<16 x i1> %va, <16 x i1> %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v16i1_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetivli zero, 5, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v9, v9, a0 @@ -277,23 +277,23 @@ define <16 x i1> @test_vp_splice_v16i1_negative_offset(<16 x i1> %va, <16 x i1> define <16 x i1> @test_vp_splice_v16i1_masked(<16 x i1> %va, <16 x i1> %vb, <16 x i1> %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_v16i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t +; CHECK-NEXT: vslidedown.vi v9, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t +; CHECK-NEXT: vslideup.vx v9, v8, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t +; CHECK-NEXT: vmsne.vi v0, v9, 0, v0.t ; CHECK-NEXT: ret %v = call <16 x i1> @llvm.experimental.vp.splice.v16i1(<16 x i1> %va, <16 x i1> %vb, i32 5, <16 x i1> %mask, i32 %evla, i32 %evlb) ret <16 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll index 815cb1f57631c..52e4f115c837d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll @@ -13,15 +13,15 @@ declare @llvm.experimental.vp.splice.nxv64i1( @test_vp_splice_nxv1i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv1i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v9, 5 @@ -37,15 +37,15 @@ define @test_vp_splice_nxv1i1( %va, @test_vp_splice_nxv1i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv1i1_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetivli zero, 5, e8, mf8, ta, ma ; CHECK-NEXT: vslidedown.vx v9, v9, a0 @@ -61,23 +61,23 @@ define @test_vp_splice_nxv1i1_negative_offset( @test_vp_splice_nxv1i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv1i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t +; CHECK-NEXT: vslidedown.vi v9, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t +; CHECK-NEXT: vslideup.vx v9, v8, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t +; CHECK-NEXT: vmsne.vi v0, v9, 0, v0.t ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv1i1( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v @@ -86,15 +86,15 @@ define @test_vp_splice_nxv1i1_masked( %va, @test_vp_splice_nxv2i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv2i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v9, 5 @@ -110,15 +110,15 @@ define @test_vp_splice_nxv2i1( %va, @test_vp_splice_nxv2i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv2i1_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetivli zero, 5, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vx v9, v9, a0 @@ -134,23 +134,23 @@ define @test_vp_splice_nxv2i1_negative_offset( @test_vp_splice_nxv2i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv2i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t +; CHECK-NEXT: vslidedown.vi v9, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t +; CHECK-NEXT: vslideup.vx v9, v8, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t +; CHECK-NEXT: vmsne.vi v0, v9, 0, v0.t ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv2i1( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v @@ -159,15 +159,15 @@ define @test_vp_splice_nxv2i1_masked( %va, @test_vp_splice_nxv4i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv4i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v9, 5 @@ -183,15 +183,15 @@ define @test_vp_splice_nxv4i1( %va, @test_vp_splice_nxv4i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv4i1_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v9, v9, a0 @@ -207,23 +207,23 @@ define @test_vp_splice_nxv4i1_negative_offset( @test_vp_splice_nxv4i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv4i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t +; CHECK-NEXT: vslidedown.vi v9, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t +; CHECK-NEXT: vslideup.vx v9, v8, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t +; CHECK-NEXT: vmsne.vi v0, v9, 0, v0.t ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv4i1( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v @@ -232,15 +232,15 @@ define @test_vp_splice_nxv4i1_masked( %va, @test_vp_splice_nxv8i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v9, v9, 5 @@ -256,15 +256,15 @@ define @test_vp_splice_nxv8i1( %va, @test_vp_splice_nxv8i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv8i1_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetivli zero, 5, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vx v9, v9, a0 @@ -280,23 +280,23 @@ define @test_vp_splice_nxv8i1_negative_offset( @test_vp_splice_nxv8i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv8i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v11, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vslidedown.vi v10, v10, 5, v0.t +; CHECK-NEXT: vslidedown.vi v9, v10, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu -; CHECK-NEXT: vslideup.vx v10, v8, a0, v0.t +; CHECK-NEXT: vslideup.vx v9, v8, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t +; CHECK-NEXT: vmsne.vi v0, v9, 0, v0.t ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv8i1( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v @@ -305,21 +305,21 @@ define @test_vp_splice_nxv8i1_masked( %va, @test_vp_splice_nxv16i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 +; CHECK-NEXT: vslidedown.vi v10, v10, 5 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vslideup.vx v8, v10, a0 -; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vslideup.vx v10, v8, a0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv16i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) @@ -329,21 +329,21 @@ define @test_vp_splice_nxv16i1( %va, @test_vp_splice_nxv16i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv16i1_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetivli zero, 5, e8, m2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vslidedown.vx v10, v10, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 5 -; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vslideup.vi v10, v8, 5 +; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv16i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) @@ -353,15 +353,15 @@ define @test_vp_splice_nxv16i1_negative_offset( @test_vp_splice_nxv16i1_masked( %va, %vb, %mask, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv16i1_masked: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v14, 0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v10, v14, 1, v0 +; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 @@ -378,21 +378,21 @@ define @test_vp_splice_nxv16i1_masked( %va, define @test_vp_splice_nxv32i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv32i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 +; CHECK-NEXT: vslidedown.vi v12, v12, 5 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vslideup.vx v8, v12, a0 -; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vslideup.vx v12, v8, a0 +; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv32i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) @@ -402,21 +402,21 @@ define @test_vp_splice_nxv32i1( %va, @test_vp_splice_nxv32i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv32i1_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetivli zero, 5, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vslidedown.vx v12, v12, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 5 -; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vslideup.vi v12, v8, 5 +; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv32i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) @@ -430,19 +430,19 @@ define @test_vp_splice_nxv32i1_masked( %va, ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 +; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vslidedown.vi v16, v16, 5, v0.t +; CHECK-NEXT: vslidedown.vi v12, v12, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: vslideup.vx v16, v12, a0, v0.t +; CHECK-NEXT: vslideup.vx v12, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vmsne.vi v0, v16, 0, v0.t +; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv32i1( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v @@ -451,21 +451,21 @@ define @test_vp_splice_nxv32i1_masked( %va, define @test_vp_splice_nxv64i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv64i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v24, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 5 +; CHECK-NEXT: vslidedown.vi v16, v16, 5 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 -; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vslideup.vx v16, v8, a0 +; CHECK-NEXT: vmsne.vi v0, v16, 0 ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv64i1( %va, %vb, i32 5, splat (i1 1), i32 %evla, i32 %evlb) @@ -475,21 +475,21 @@ define @test_vp_splice_nxv64i1( %va, @test_vp_splice_nxv64i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) { ; CHECK-LABEL: test_vp_splice_nxv64i1_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v24, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v8, v24, 1, v0 +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetivli zero, 5, e8, m8, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: vslidedown.vx v16, v16, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vslideup.vi v8, v16, 5 -; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vslideup.vi v16, v8, 5 +; CHECK-NEXT: vmsne.vi v0, v16, 0 ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv64i1( %va, %vb, i32 -5, splat (i1 1), i32 %evla, i32 %evlb) @@ -503,19 +503,19 @@ define @test_vp_splice_nxv64i1_masked( %va, ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 +; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: addi a0, a0, -5 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vslidedown.vi v24, v24, 5, v0.t +; CHECK-NEXT: vslidedown.vi v16, v16, 5, v0.t ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: vslideup.vx v24, v16, a0, v0.t +; CHECK-NEXT: vslideup.vx v16, v24, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v24, 0, v0.t +; CHECK-NEXT: vmsne.vi v0, v16, 0, v0.t ; CHECK-NEXT: ret %v = call @llvm.experimental.vp.splice.nxv64i1( %va, %vb, i32 5, %mask, i32 %evla, i32 %evlb) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index c86fee6305931..010b3a9159bc9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -254,7 +254,6 @@ declare @llvm.vp.gather.nxv32i8.nxv32p0(, define @vpgather_baseidx_nxv32i8(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv32i8: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a2, a3, 1 ; RV32-NEXT: sub a4, a1, a2 @@ -263,6 +262,7 @@ define @vpgather_baseidx_nxv32i8(ptr %base, @vpgather_baseidx_nxv32i8(ptr %base, @vpgather_baseidx_nxv32i8(ptr %base, @llvm.vp.gather.nxv16f64.nxv16p0( @vpgather_nxv16f64( %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub a2, a0, a1 ; RV32-NEXT: sltu a3, a0, a2 @@ -2272,6 +2272,7 @@ define @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v16, v8 ; RV32-NEXT: vsll.vi v24, v16, 3 @@ -2324,6 +2324,7 @@ define @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, %idxs @@ -2370,7 +2371,6 @@ define @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v16, v8 ; RV32-NEXT: vsll.vi v24, v16, 3 @@ -2381,6 +2381,7 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: srli a4, a2, 3 ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v8, v0 ; RV32-NEXT: vslidedown.vx v0, v0, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t @@ -2389,17 +2390,16 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB104_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v10 ; RV64-NEXT: vsext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 +; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: sub a3, a1, a2 @@ -2408,6 +2408,7 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: srli a4, a2, 3 ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vslidedown.vx v0, v0, a4 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t @@ -2416,8 +2417,8 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB104_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2428,7 +2429,6 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vzext.vf2 v16, v8 ; RV32-NEXT: vsll.vi v24, v16, 3 @@ -2439,6 +2439,7 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: and a3, a4, a3 ; RV32-NEXT: srli a4, a2, 3 ; RV32-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV32-NEXT: vmv1r.v v8, v0 ; RV32-NEXT: vslidedown.vx v0, v0, a4 ; RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t @@ -2447,13 +2448,12 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB105_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV64-NEXT: vzext.vf2 v16, v8 ; RV64-NEXT: vsll.vi v24, v16, 3 @@ -2464,6 +2464,7 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: and a3, a4, a3 ; RV64-NEXT: srli a4, a2, 3 ; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV64-NEXT: vmv1r.v v8, v0 ; RV64-NEXT: vslidedown.vx v0, v0, a4 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t @@ -2472,7 +2473,7 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB105_2: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t ; RV64-NEXT: ret %eidxs = zext %idxs to diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index f07c16476c56a..27dbd4ac1b7c8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -437,7 +437,6 @@ declare @llvm.vp.load.nxv16f64.p0(ptr, define @vpload_nxv16f64(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpload_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: sltu a4, a1, a3 @@ -447,6 +446,7 @@ define @vpload_nxv16f64(ptr %ptr, %m, ; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: srli a5, a2, 3 ; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v8, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a5 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t @@ -477,37 +477,37 @@ define @vpload_nxv17f64(ptr %ptr, ptr %out, @vpload_nxv17f64(ptr %ptr, ptr %out, @vpmerge_vv_nxv128i8( %va, @vpmerge_vv_nxv128i8( %va, @vpmerge_vv_nxv128i8( %va, @vpmerge_vx_nxv128i8(i8 %a, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vx_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: csrr a1, vlenb @@ -415,7 +424,7 @@ define @vpmerge_vx_nxv128i8(i8 %a, %vb, ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB29_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %a, i32 0 @@ -427,7 +436,7 @@ define @vpmerge_vx_nxv128i8(i8 %a, %vb, define @vpmerge_vi_nxv128i8( %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb @@ -443,7 +452,7 @@ define @vpmerge_vi_nxv128i8( %vb, @llvm.vp.merge.nxv128i8( %m, splat (i8 2), %vb, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index c12fc0497742a..4ea104d98de54 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -400,7 +400,6 @@ define void @vpstore_nxv17f64( %val, ptr %ptr, %val, ptr %ptr, %val, ptr %ptr, %val, ptr %ptr, %val, ptr %ptr, %m, i32 %evl) ret void diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll index 4f7cb84c08644..d0549c98e1baa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -178,7 +178,7 @@ define half @vpreduce_fadd_nxv64f16(half %s, %v, %v, %v, %v, % ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: srli a2, a3, 2 ; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v24, v0, a2 +; RV32-NEXT: vslidedown.vx v25, v0, a2 ; RV32-NEXT: slli a3, a3, 1 ; RV32-NEXT: sub a2, a1, a3 ; RV32-NEXT: sltu a4, a1, a2 @@ -1162,13 +1162,13 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, % ; RV32-NEXT: mv a1, a3 ; RV32-NEXT: .LBB67_2: ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.s.x v25, a0 +; RV32-NEXT: vmv.s.x v24, a0 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vredmaxu.vs v25, v8, v25, v0.t +; RV32-NEXT: vredmaxu.vs v24, v8, v24, v0.t ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v24 -; RV32-NEXT: vredmaxu.vs v25, v16, v25, v0.t -; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: vmv1r.v v0, v25 +; RV32-NEXT: vredmaxu.vs v24, v16, v24, v0.t +; RV32-NEXT: vmv.x.s a0, v24 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umax_nxv32i32: @@ -1176,25 +1176,25 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, % ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: srli a2, a3, 2 ; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v24, v0, a2 -; RV64-NEXT: andi a2, a0, -1 +; RV64-NEXT: vslidedown.vx v25, v0, a2 +; RV64-NEXT: andi a0, a0, -1 ; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: sub a0, a1, a3 -; RV64-NEXT: sltu a4, a1, a0 +; RV64-NEXT: sub a2, a1, a3 +; RV64-NEXT: sltu a4, a1, a2 ; RV64-NEXT: addi a4, a4, -1 -; RV64-NEXT: and a0, a4, a0 +; RV64-NEXT: and a2, a4, a2 ; RV64-NEXT: bltu a1, a3, .LBB67_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a3 ; RV64-NEXT: .LBB67_2: ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmv.s.x v25, a2 +; RV64-NEXT: vmv.s.x v24, a0 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV64-NEXT: vredmaxu.vs v25, v8, v25, v0.t -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vmv1r.v v0, v24 -; RV64-NEXT: vredmaxu.vs v25, v16, v25, v0.t -; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: vredmaxu.vs v24, v8, v24, v0.t +; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV64-NEXT: vmv1r.v v0, v25 +; RV64-NEXT: vredmaxu.vs v24, v16, v24, v0.t +; RV64-NEXT: vmv.x.s a0, v24 ; RV64-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.nxv32i32(i32 %s, %v, %m, i32 %evl) ret i32 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll index f9ea5143cfcb7..5adf48f18f999 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-vp.ll @@ -572,7 +572,7 @@ declare @llvm.vp.sadd.sat.nxv128i8(, @vsadd_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsadd_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb @@ -588,7 +588,7 @@ define @vsadd_vi_nxv128i8( %va, @llvm.vp.sadd.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) @@ -1350,10 +1350,10 @@ declare @llvm.vp.sadd.sat.nxv32i32(, @vsadd_vi_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsadd_vi_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -1367,7 +1367,7 @@ define @vsadd_vi_nxv32i32( %va, @llvm.vp.sadd.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll index 745b93b257085..7581e3dc1eff5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-vp.ll @@ -571,7 +571,7 @@ declare @llvm.vp.uadd.sat.nxv128i8(, @vsaddu_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsaddu_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a0, vlenb @@ -587,7 +587,7 @@ define @vsaddu_vi_nxv128i8( %va, @llvm.vp.uadd.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) @@ -1349,10 +1349,10 @@ declare @llvm.vp.uadd.sat.nxv32i32(, @vsaddu_vi_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsaddu_vi_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -1366,7 +1366,7 @@ define @vsaddu_vi_nxv32i32( %va, @llvm.vp.uadd.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll index a14ce71726153..97455786b82d6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll @@ -152,45 +152,45 @@ define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ; NO_FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users: ; NO_FOLDING: # %bb.0: ; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu -; NO_FOLDING-NEXT: vlm.v v8, (a0) -; NO_FOLDING-NEXT: vlm.v v9, (a1) -; NO_FOLDING-NEXT: vlm.v v10, (a2) -; NO_FOLDING-NEXT: vmv.v.i v11, 0 -; NO_FOLDING-NEXT: vmv.v.v v0, v8 -; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 -; NO_FOLDING-NEXT: vmv.v.v v0, v9 -; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 +; NO_FOLDING-NEXT: vlm.v v0, (a0) +; NO_FOLDING-NEXT: vlm.v v10, (a1) +; NO_FOLDING-NEXT: vlm.v v12, (a2) +; NO_FOLDING-NEXT: vmv.v.i v8, 0 +; NO_FOLDING-NEXT: vmerge.vim v9, v8, -1, v0 +; NO_FOLDING-NEXT: vmv.v.v v11, v0 ; NO_FOLDING-NEXT: vmv.v.v v0, v10 -; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 -; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 +; NO_FOLDING-NEXT: vmerge.vim v10, v8, -1, v0 +; NO_FOLDING-NEXT: vmv.v.v v0, v12 +; NO_FOLDING-NEXT: vmerge.vim v8, v8, -1, v0 +; NO_FOLDING-NEXT: vmul.vv v10, v9, v10 ; NO_FOLDING-NEXT: li a0, 1 -; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 -; NO_FOLDING-NEXT: vmv.v.v v0, v8 -; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t -; NO_FOLDING-NEXT: vor.vv v8, v9, v10 -; NO_FOLDING-NEXT: vor.vv v8, v8, v11 +; NO_FOLDING-NEXT: vsub.vv v9, v9, v8 +; NO_FOLDING-NEXT: vmv.v.v v0, v11 +; NO_FOLDING-NEXT: vsub.vx v8, v8, a0, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v10, v8 +; NO_FOLDING-NEXT: vor.vv v8, v8, v9 ; NO_FOLDING-NEXT: ret ; ; FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users: ; FOLDING: # %bb.0: ; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu -; FOLDING-NEXT: vlm.v v8, (a0) -; FOLDING-NEXT: vlm.v v9, (a1) -; FOLDING-NEXT: vlm.v v10, (a2) -; FOLDING-NEXT: vmv.v.i v11, 0 -; FOLDING-NEXT: vmv.v.v v0, v8 -; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 -; FOLDING-NEXT: vmv.v.v v0, v9 -; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 +; FOLDING-NEXT: vlm.v v0, (a0) +; FOLDING-NEXT: vlm.v v10, (a1) +; FOLDING-NEXT: vlm.v v12, (a2) +; FOLDING-NEXT: vmv.v.i v8, 0 +; FOLDING-NEXT: vmerge.vim v9, v8, -1, v0 +; FOLDING-NEXT: vmv.v.v v11, v0 ; FOLDING-NEXT: vmv.v.v v0, v10 -; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 -; FOLDING-NEXT: vmul.vv v9, v12, v9 +; FOLDING-NEXT: vmerge.vim v10, v8, -1, v0 +; FOLDING-NEXT: vmv.v.v v0, v12 +; FOLDING-NEXT: vmerge.vim v8, v8, -1, v0 +; FOLDING-NEXT: vmul.vv v10, v9, v10 ; FOLDING-NEXT: li a0, 1 -; FOLDING-NEXT: vsub.vv v11, v12, v10 -; FOLDING-NEXT: vmv.v.v v0, v8 -; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t -; FOLDING-NEXT: vor.vv v8, v9, v10 -; FOLDING-NEXT: vor.vv v8, v8, v11 +; FOLDING-NEXT: vsub.vv v9, v9, v8 +; FOLDING-NEXT: vmv.v.v v0, v11 +; FOLDING-NEXT: vsub.vx v8, v8, a0, v0.t +; FOLDING-NEXT: vor.vv v8, v10, v8 +; FOLDING-NEXT: vor.vv v8, v8, v9 ; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y @@ -210,45 +210,45 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; NO_FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users: ; NO_FOLDING: # %bb.0: ; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu -; NO_FOLDING-NEXT: vlm.v v8, (a0) -; NO_FOLDING-NEXT: vlm.v v9, (a1) -; NO_FOLDING-NEXT: vlm.v v10, (a2) -; NO_FOLDING-NEXT: vmv.v.i v11, 0 -; NO_FOLDING-NEXT: vmv1r.v v0, v8 -; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 -; NO_FOLDING-NEXT: vmv1r.v v0, v9 -; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 +; NO_FOLDING-NEXT: vlm.v v0, (a0) +; NO_FOLDING-NEXT: vlm.v v10, (a1) +; NO_FOLDING-NEXT: vlm.v v12, (a2) +; NO_FOLDING-NEXT: vmv.v.i v8, 0 +; NO_FOLDING-NEXT: vmerge.vim v9, v8, -1, v0 +; NO_FOLDING-NEXT: vmv1r.v v11, v0 ; NO_FOLDING-NEXT: vmv1r.v v0, v10 -; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 -; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 +; NO_FOLDING-NEXT: vmerge.vim v10, v8, -1, v0 +; NO_FOLDING-NEXT: vmv1r.v v0, v12 +; NO_FOLDING-NEXT: vmerge.vim v8, v8, -1, v0 +; NO_FOLDING-NEXT: vmul.vv v10, v9, v10 ; NO_FOLDING-NEXT: li a0, 1 -; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 -; NO_FOLDING-NEXT: vmv1r.v v0, v8 -; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t -; NO_FOLDING-NEXT: vor.vv v8, v9, v10 -; NO_FOLDING-NEXT: vor.vv v8, v8, v11 +; NO_FOLDING-NEXT: vsub.vv v9, v9, v8 +; NO_FOLDING-NEXT: vmv1r.v v0, v11 +; NO_FOLDING-NEXT: vsub.vx v8, v8, a0, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v10, v8 +; NO_FOLDING-NEXT: vor.vv v8, v8, v9 ; NO_FOLDING-NEXT: ret ; ; FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users: ; FOLDING: # %bb.0: ; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu -; FOLDING-NEXT: vlm.v v8, (a0) -; FOLDING-NEXT: vlm.v v9, (a1) -; FOLDING-NEXT: vlm.v v10, (a2) -; FOLDING-NEXT: vmv.v.i v11, 0 -; FOLDING-NEXT: vmv1r.v v0, v8 -; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 -; FOLDING-NEXT: vmv1r.v v0, v9 -; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 +; FOLDING-NEXT: vlm.v v0, (a0) +; FOLDING-NEXT: vlm.v v10, (a1) +; FOLDING-NEXT: vlm.v v12, (a2) +; FOLDING-NEXT: vmv.v.i v8, 0 +; FOLDING-NEXT: vmerge.vim v9, v8, -1, v0 +; FOLDING-NEXT: vmv1r.v v11, v0 ; FOLDING-NEXT: vmv1r.v v0, v10 -; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 -; FOLDING-NEXT: vmul.vv v9, v12, v9 +; FOLDING-NEXT: vmerge.vim v10, v8, -1, v0 +; FOLDING-NEXT: vmv1r.v v0, v12 +; FOLDING-NEXT: vmerge.vim v8, v8, -1, v0 +; FOLDING-NEXT: vmul.vv v10, v9, v10 ; FOLDING-NEXT: li a0, 1 -; FOLDING-NEXT: vsub.vv v11, v12, v10 -; FOLDING-NEXT: vmv1r.v v0, v8 -; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t -; FOLDING-NEXT: vor.vv v8, v9, v10 -; FOLDING-NEXT: vor.vv v8, v8, v11 +; FOLDING-NEXT: vsub.vv v9, v9, v8 +; FOLDING-NEXT: vmv1r.v v0, v11 +; FOLDING-NEXT: vsub.vx v8, v8, a0, v0.t +; FOLDING-NEXT: vor.vv v8, v10, v8 +; FOLDING-NEXT: vor.vv v8, v8, v9 ; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y @@ -448,16 +448,16 @@ define @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, ; NO_FOLDING: # %bb.0: ; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu ; NO_FOLDING-NEXT: vlm.v v0, (a0) -; NO_FOLDING-NEXT: vlm.v v8, (a2) -; NO_FOLDING-NEXT: vlm.v v9, (a1) -; NO_FOLDING-NEXT: vmv.v.i v10, 0 -; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 -; NO_FOLDING-NEXT: vmv.v.v v0, v8 -; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 -; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 -; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 -; NO_FOLDING-NEXT: vmv.v.v v0, v9 -; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; NO_FOLDING-NEXT: vlm.v v10, (a2) +; NO_FOLDING-NEXT: vlm.v v11, (a1) +; NO_FOLDING-NEXT: vmv.v.i v8, 0 +; NO_FOLDING-NEXT: vmerge.vim v9, v8, 1, v0 +; NO_FOLDING-NEXT: vmv.v.v v0, v10 +; NO_FOLDING-NEXT: vmerge.vim v8, v8, 1, v0 +; NO_FOLDING-NEXT: vadd.vv v10, v9, v8 +; NO_FOLDING-NEXT: vsub.vv v8, v9, v8 +; NO_FOLDING-NEXT: vmv.v.v v0, v11 +; NO_FOLDING-NEXT: vor.vv v10, v10, v9, v0.t ; NO_FOLDING-NEXT: vor.vv v8, v10, v8 ; NO_FOLDING-NEXT: ret ; @@ -465,16 +465,16 @@ define @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, ; FOLDING: # %bb.0: ; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu ; FOLDING-NEXT: vlm.v v0, (a0) -; FOLDING-NEXT: vlm.v v8, (a2) -; FOLDING-NEXT: vlm.v v9, (a1) -; FOLDING-NEXT: vmv.v.i v10, 0 -; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 -; FOLDING-NEXT: vmv.v.v v0, v8 -; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 -; FOLDING-NEXT: vadd.vv v10, v11, v8 -; FOLDING-NEXT: vsub.vv v8, v11, v8 -; FOLDING-NEXT: vmv.v.v v0, v9 -; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; FOLDING-NEXT: vlm.v v10, (a2) +; FOLDING-NEXT: vlm.v v11, (a1) +; FOLDING-NEXT: vmv.v.i v8, 0 +; FOLDING-NEXT: vmerge.vim v9, v8, 1, v0 +; FOLDING-NEXT: vmv.v.v v0, v10 +; FOLDING-NEXT: vmerge.vim v8, v8, 1, v0 +; FOLDING-NEXT: vadd.vv v10, v9, v8 +; FOLDING-NEXT: vsub.vv v8, v9, v8 +; FOLDING-NEXT: vmv.v.v v0, v11 +; FOLDING-NEXT: vor.vv v10, v10, v9, v0.t ; FOLDING-NEXT: vor.vv v8, v10, v8 ; FOLDING-NEXT: ret %a = load , ptr %x @@ -496,16 +496,16 @@ define @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, p ; NO_FOLDING: # %bb.0: ; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; NO_FOLDING-NEXT: vlm.v v0, (a0) -; NO_FOLDING-NEXT: vlm.v v8, (a2) -; NO_FOLDING-NEXT: vlm.v v9, (a1) -; NO_FOLDING-NEXT: vmv.v.i v10, 0 -; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 -; NO_FOLDING-NEXT: vmv1r.v v0, v8 -; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 -; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 -; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 -; NO_FOLDING-NEXT: vmv1r.v v0, v9 -; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; NO_FOLDING-NEXT: vlm.v v10, (a2) +; NO_FOLDING-NEXT: vlm.v v11, (a1) +; NO_FOLDING-NEXT: vmv.v.i v8, 0 +; NO_FOLDING-NEXT: vmerge.vim v9, v8, 1, v0 +; NO_FOLDING-NEXT: vmv1r.v v0, v10 +; NO_FOLDING-NEXT: vmerge.vim v8, v8, 1, v0 +; NO_FOLDING-NEXT: vadd.vv v10, v9, v8 +; NO_FOLDING-NEXT: vsub.vv v8, v9, v8 +; NO_FOLDING-NEXT: vmv1r.v v0, v11 +; NO_FOLDING-NEXT: vor.vv v10, v10, v9, v0.t ; NO_FOLDING-NEXT: vor.vv v8, v10, v8 ; NO_FOLDING-NEXT: ret ; @@ -513,16 +513,16 @@ define @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, p ; FOLDING: # %bb.0: ; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; FOLDING-NEXT: vlm.v v0, (a0) -; FOLDING-NEXT: vlm.v v8, (a2) -; FOLDING-NEXT: vlm.v v9, (a1) -; FOLDING-NEXT: vmv.v.i v10, 0 -; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 -; FOLDING-NEXT: vmv1r.v v0, v8 -; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 -; FOLDING-NEXT: vadd.vv v10, v11, v8 -; FOLDING-NEXT: vsub.vv v8, v11, v8 -; FOLDING-NEXT: vmv1r.v v0, v9 -; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; FOLDING-NEXT: vlm.v v10, (a2) +; FOLDING-NEXT: vlm.v v11, (a1) +; FOLDING-NEXT: vmv.v.i v8, 0 +; FOLDING-NEXT: vmerge.vim v9, v8, 1, v0 +; FOLDING-NEXT: vmv1r.v v0, v10 +; FOLDING-NEXT: vmerge.vim v8, v8, 1, v0 +; FOLDING-NEXT: vadd.vv v10, v9, v8 +; FOLDING-NEXT: vsub.vv v8, v9, v8 +; FOLDING-NEXT: vmv1r.v v0, v11 +; FOLDING-NEXT: vor.vv v10, v10, v9, v0.t ; FOLDING-NEXT: vor.vv v8, v10, v8 ; FOLDING-NEXT: ret %a = load , ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index d4b4cb503c76e..b663e3bde9ded 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -211,12 +211,12 @@ define @vfmerge_fv_nxv32f16( %va, half ; CHECK-ZVFHMIN: # %bb.0: ; CHECK-ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 ; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v24, fa5 +; CHECK-ZVFHMIN-NEXT: vfmv.v.f v16, fa5 ; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-ZVFHMIN-NEXT: vfncvt.f.f.w v16, v24 -; CHECK-ZVFHMIN-NEXT: vmv.v.v v20, v16 +; CHECK-ZVFHMIN-NEXT: vfncvt.f.f.w v24, v16 +; CHECK-ZVFHMIN-NEXT: vmv.v.v v28, v24 ; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-ZVFHMIN-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -458,17 +458,16 @@ define @vselect_combine_regression( %v ; CHECK-NEXT: vl8re64.v v8, (a1) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmseq.vi v16, v16, 0 +; CHECK-NEXT: vmseq.vi v24, v16, 0 ; CHECK-NEXT: vmseq.vi v0, v0, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmv.v.i v16, 0 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 -; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 @@ -484,15 +483,16 @@ define void @vselect_legalize_regression( %a, @select_nxv32i32( %a, @select_nxv32i32( %a, @select_nxv32i32( %a, @select_evl_nxv32i32( %a, @select_evl_nxv32i32( %a, @select_evl_nxv32i32( %a, @select_nxv16f64( %a, @llvm.vp.sext.nxv32i32.nxv32i8(, < define @vsext_nxv32i8_nxv32i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vsext_nxv32i8_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -168,7 +168,7 @@ define @vsext_nxv32i8_nxv32i32( %a, @llvm.vp.sitofp.nxv32f16.nxv32i32( @vsitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vsitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v24, v0 ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; ZVFH-NEXT: vmv1r.v v25, v0 ; ZVFH-NEXT: vslidedown.vx v0, v0, a2 ; ZVFH-NEXT: slli a1, a1, 1 ; ZVFH-NEXT: sub a2, a0, a1 @@ -403,17 +403,17 @@ define @vsitofp_nxv32f16_nxv32i32( %va, ; ZVFH-NEXT: mv a0, a1 ; ZVFH-NEXT: .LBB25_2: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v24 +; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfncvt.f.x.w v24, v8, v0.t ; ZVFH-NEXT: vmv8r.v v8, v24 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vsitofp_nxv32f16_nxv32i32: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: srli a2, a1, 2 ; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: slli a1, a1, 1 ; ZVFHMIN-NEXT: sub a2, a0, a1 @@ -444,10 +444,10 @@ declare @llvm.vp.sitofp.nxv32f32.nxv32i32( @vsitofp_nxv32f32_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_nxv32f32_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -461,7 +461,7 @@ define @vsitofp_nxv32f32_nxv32i32( %va, ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.sitofp.nxv32f32.nxv32i32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll index b56a0f40176cf..230125ceca5a6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-vp.ll @@ -590,7 +590,7 @@ declare @llvm.vp.ssub.sat.nxv128i8(, @vssub_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssub_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a2, vlenb @@ -607,7 +607,7 @@ define @vssub_vi_nxv128i8( %va, @llvm.vp.ssub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) @@ -1392,10 +1392,10 @@ declare @llvm.vp.ssub.sat.nxv32i32(, @vssub_vi_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssub_vi_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a1, a2, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a1 ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: sub a1, a0, a2 @@ -1410,7 +1410,7 @@ define @vssub_vi_nxv32i32( %va, @llvm.vp.ssub.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll index 8275c3081c7c1..70a4e5e8362b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-vp.ll @@ -588,7 +588,7 @@ declare @llvm.vp.usub.sat.nxv128i8(, @vssubu_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssubu_vi_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) ; CHECK-NEXT: csrr a2, vlenb @@ -605,7 +605,7 @@ define @vssubu_vi_nxv128i8( %va, @llvm.vp.usub.sat.nxv128i8( %va, splat (i8 -1), %m, i32 %evl) @@ -1390,10 +1390,10 @@ declare @llvm.vp.usub.sat.nxv32i32(, @vssubu_vi_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vssubu_vi_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a1, a2, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a1 ; CHECK-NEXT: slli a2, a2, 1 ; CHECK-NEXT: sub a1, a0, a2 @@ -1408,7 +1408,7 @@ define @vssubu_vi_nxv32i32( %va, @llvm.vp.usub.sat.nxv32i32( %va, splat (i32 -1), %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index 4857810e7a170..f848d9b1a91d3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -157,25 +157,25 @@ declare @llvm.vp.trunc.nxv15i16.nxv15i64( define @vtrunc_nxv15i16_nxv15i64( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_nxv15i16_nxv15i64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 ; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v28, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v28, v16, 0, v0.t +; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vnsrl.wi v18, v28, 0, v0.t +; CHECK-NEXT: vnsrl.wi v18, v24, 0, v0.t ; CHECK-NEXT: bltu a0, a1, .LBB12_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB12_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v28 ; CHECK-NEXT: vnsrl.wi v20, v8, 0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v20, 0, v0.t @@ -214,10 +214,10 @@ declare @llvm.vp.trunc.nxv32i7.nxv32i32(, define @vtrunc_nxv32i7_nxv32i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_nxv32i7_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v28, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -225,15 +225,15 @@ define @vtrunc_nxv32i7_nxv32i32( %a, @llvm.vp.trunc.nxv32i8.nxv32i32(, define @vtrunc_nxv32i8_nxv32i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_nxv32i8_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v28, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -259,15 +259,15 @@ define @vtrunc_nxv32i8_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @vtrunc_nxv32i64_nxv32i32( %a, @llvm.vp.uitofp.nxv32f16.nxv32i32( @vuitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vuitofp_nxv32f16_nxv32i32: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v24, v0 ; ZVFH-NEXT: csrr a1, vlenb ; ZVFH-NEXT: srli a2, a1, 2 ; ZVFH-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; ZVFH-NEXT: vmv1r.v v25, v0 ; ZVFH-NEXT: vslidedown.vx v0, v0, a2 ; ZVFH-NEXT: slli a1, a1, 1 ; ZVFH-NEXT: sub a2, a0, a1 @@ -403,17 +403,17 @@ define @vuitofp_nxv32f16_nxv32i32( %va, ; ZVFH-NEXT: mv a0, a1 ; ZVFH-NEXT: .LBB25_2: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v24 +; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfncvt.f.xu.w v24, v8, v0.t ; ZVFH-NEXT: vmv8r.v v8, v24 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vuitofp_nxv32f16_nxv32i32: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: srli a2, a1, 2 ; ZVFHMIN-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 ; ZVFHMIN-NEXT: slli a1, a1, 1 ; ZVFHMIN-NEXT: sub a2, a0, a1 @@ -444,10 +444,10 @@ declare @llvm.vp.uitofp.nxv32f32.nxv32i32( @vuitofp_nxv32f32_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vuitofp_nxv32f32_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -461,7 +461,7 @@ define @vuitofp_nxv32f32_nxv32i32( %va, ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t ; CHECK-NEXT: ret %v = call @llvm.vp.uitofp.nxv32f32.nxv32i32( %va, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll index 400f89b1ef77d..47c4e35f264fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll @@ -151,10 +151,10 @@ declare @llvm.vp.zext.nxv32i32.nxv32i8(, < define @vzext_nxv32i8_nxv32i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vzext_nxv32i8_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v13, v0 ; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: slli a1, a1, 1 ; CHECK-NEXT: sub a2, a0, a1 @@ -168,7 +168,7 @@ define @vzext_nxv32i8_nxv32i32( %a,