diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 998dfd956575d..2d9d3e350c493 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMVectorize
   Vectorize.cpp
   VectorCombine.cpp
   VPlan.cpp
+  VPlanCostModel.cpp
   VPlanHCFGBuilder.cpp
   VPlanRecipes.cpp
   VPlanSLP.cpp
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 3a35f3b754743..9660ce161cd5b 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -280,6 +280,9 @@ class LoopVectorizationPlanner {
 
   SmallVector<VPlanPtr, 4> VPlans;
 
+  /// Candidate VectorizationFactors for VPlans.
+  DenseMap<VPlan *, SmallVector<VectorizationFactor>> VFCandidates;
+
   /// A builder used to construct the current plan.
   VPBuilder Builder;
 
@@ -336,6 +339,21 @@ class LoopVectorizationPlanner {
   /// Check if the number of runtime checks exceeds the threshold.
   bool requiresTooManyRuntimeChecks() const;
 
+  /// \return The most profitable vectorization factor and the cost of that VF.
+  /// This method checks every VF in every plan in VPlans.
+  VectorizationFactor selectVectorizationFactor();
+
+  /// \return The most profitable vectorization factor and the cost of that VF
+  /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
+  /// epilogue vectorization is not supported for the loop.
+  VectorizationFactor
+  selectEpilogueVectorizationFactor(const ElementCount MaxVF);
+
+  /// Convenience function that returns the value of vscale_range iff
+  /// vscale_range.min == vscale_range.max or otherwise returns the value
+  /// returned by the corresponding TLI method.
+  std::optional<unsigned> getVScaleForTuning() const;
+
 protected:
   /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
   /// according to the information gathered by Legal when it checked if it is
@@ -370,6 +388,25 @@ class LoopVectorizationPlanner {
   void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan,
                                   VPRecipeBuilder &RecipeBuilder,
                                   ElementCount MinVF);
+
+  /// Returns true when Factor A is more profitable than Factor B.
+  bool isMoreProfitable(const VectorizationFactor &A,
+                        const VectorizationFactor &B) const;
+
+  /// Determines if we have the infrastructure to vectorize loop \p L and its
+  /// epilogue, assuming the main loop is vectorized by \p VF.
+  bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
+
+  /// Returns true if epilogue vectorization is considered profitable, and
+  /// false otherwise.
+  /// \p VF is the vectorization factor chosen for the original loop.
+  bool isEpilogueVectorizationProfitable(const ElementCount VF) const;
+
+  ArrayRef<VectorizationFactor> getVFCandidatesFor(VPlan &Plan) const {
+    auto I = VFCandidates.find(&Plan);
+    assert(I != VFCandidates.end());
+    return I->second;
+  }
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ea70036b3477c..4846fed6f8b1f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -57,6 +57,7 @@
 #include "LoopVectorizationPlanner.h"
 #include "VPRecipeBuilder.h"
 #include "VPlan.h"
+#include "VPlanCostModel.h"
 #include "VPlanHCFGBuilder.h"
 #include "VPlanTransforms.h"
 #include "llvm/ADT/APInt.h"
@@ -363,6 +364,11 @@ cl::opt<bool> EnableVPlanNativePath(
              "support for outer loop vectorization."));
 }
 
+cl::opt<bool> CostUsingVPlan("vplan-use-vplan-cost-model", cl::init(false),
+                             cl::Hidden,
+                             cl::desc("Enable VPlan based costing path. To "
+                                      "become the default in the future."));
+
 // This flag enables the stress testing of the VPlan H-CFG construction in the
 // VPlan-native vectorization path. It must be used in conjuction with
 // -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
@@ -1161,6 +1167,8 @@ using ElementCountSet = SmallSet<ElementCount, 16, ElementCountComparator>;
 
 using InstructionVFPair = std::pair<Instruction *, ElementCount>;
 
+using VectorizationCostTy = std::pair<InstructionCost, bool>;
+
 /// LoopVectorizationCostModel - estimates the expected speedups due to
 /// vectorization.
 /// In many cases vectorization is not profitable. This can happen because of
@@ -1169,6 +1177,8 @@ using InstructionVFPair = std::pair<Instruction *, ElementCount>;
 /// TargetTransformInfo to query the different backends for the cost of
 /// different operations.
 class LoopVectorizationCostModel {
+  friend class VPlanCostModel;
+
 public:
   LoopVectorizationCostModel(ScalarEpilogueLowering SEL, Loop *L,
                              PredicatedScalarEvolution &PSE, LoopInfo *LI,
@@ -1192,18 +1202,6 @@ class LoopVectorizationCostModel {
   /// otherwise.
   bool runtimeChecksRequired();
 
-  /// \return The most profitable vectorization factor and the cost of that VF.
-  /// This method checks every VF in \p CandidateVFs.
-  VectorizationFactor
-  selectVectorizationFactor(const ElementCountSet &CandidateVFs);
-
-  /// \return The most profitable vectorization factor and the cost of that VF
-  /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
-  /// epilogue vectorization is not supported for the loop.
-  VectorizationFactor
-  selectEpilogueVectorizationFactor(const ElementCount MaxVF,
-                                    const LoopVectorizationPlanner &LVP);
-
   /// Setup cost-based decisions for user vectorization factor.
   /// \return true if the UserVF is a feasible VF to be chosen.
   bool selectUserVectorizationFactor(ElementCount UserVF) {
@@ -1633,10 +1631,17 @@ class LoopVectorizationCostModel {
     Scalars.clear();
   }
 
-  /// Convenience function that returns the value of vscale_range iff
-  /// vscale_range.min == vscale_range.max or otherwise returns the value
-  /// returned by the corresponding TLI method.
-  std::optional<unsigned> getVScaleForTuning() const;
+  /// Returns the expected execution cost. The unit of the cost does
+  /// not matter because we use the 'cost' units to compare different
+  /// vector widths. The cost that is returned is *not* normalized by
+  /// the factor width. If \p Invalid is not nullptr, this function
+  /// will add a pair(Instruction*, ElementCount) to \p Invalid for
+  /// each instruction that has an Invalid cost for the given VF.
+  VectorizationCostTy
+  expectedCost(ElementCount VF,
+               SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
+
+  bool hasPredStores() const { return NumPredStores > 0; }
 
 private:
   unsigned NumPredStores = 0;
@@ -1668,17 +1673,6 @@ class LoopVectorizationCostModel {
   /// operate on vector values after type legalization in the backend. If this
   /// latter value is false, then all operations will be scalarized (i.e. no
   /// vectorization has actually taken place).
-  using VectorizationCostTy = std::pair<InstructionCost, bool>;
-
-  /// Returns the expected execution cost. The unit of the cost does
-  /// not matter because we use the 'cost' units to compare different
-  /// vector widths. The cost that is returned is *not* normalized by
-  /// the factor width. If \p Invalid is not nullptr, this function
-  /// will add a pair(Instruction*, ElementCount) to \p Invalid for
-  /// each instruction that has an Invalid cost for the given VF.
-  VectorizationCostTy
-  expectedCost(ElementCount VF,
-               SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
 
   /// Returns the execution time cost of an instruction for a given vector
   /// width. Vector width of one means scalar.
@@ -1842,15 +1836,6 @@ class LoopVectorizationCostModel {
         Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); }));
   }
 
-  /// Determines if we have the infrastructure to vectorize the loop and its
-  /// epilogue, assuming the main loop is vectorized by \p VF.
-  bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
-
-  /// Returns true if epilogue vectorization is considered profitable, and
-  /// false otherwise.
-  /// \p VF is the vectorization factor chosen for the original loop.
-  bool isEpilogueVectorizationProfitable(const ElementCount VF) const;
-
 public:
   /// The loop that we evaluate.
   Loop *TheLoop;
@@ -5347,69 +5332,6 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
   return MaxVF;
 }
 
-std::optional<unsigned> LoopVectorizationCostModel::getVScaleForTuning() const {
-  if (TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
-    auto Attr = TheFunction->getFnAttribute(Attribute::VScaleRange);
-    auto Min = Attr.getVScaleRangeMin();
-    auto Max = Attr.getVScaleRangeMax();
-    if (Max && Min == Max)
-      return Max;
-  }
-
-  return TTI.getVScaleForTuning();
-}
-
-bool LoopVectorizationCostModel::isMoreProfitable(
-    const VectorizationFactor &A, const VectorizationFactor &B) const {
-  InstructionCost CostA = A.Cost;
-  InstructionCost CostB = B.Cost;
-
-  unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(TheLoop);
-
-  if (!A.Width.isScalable() && !B.Width.isScalable() && MaxTripCount) {
-    // If the trip count is a known (possibly small) constant, the trip count
-    // will be rounded up to an integer number of iterations under
-    // FoldTailByMasking. The total cost in that case will be
-    // VecCost*ceil(TripCount/VF). When not folding the tail, the total
-    // cost will be VecCost*floor(TC/VF) + ScalarCost*(TC%VF). There will be
-    // some extra overheads, but for the purpose of comparing the costs of
-    // different VFs we can use this to compare the total loop-body cost
-    // expected after vectorization.
-    auto GetCostForTC = [MaxTripCount, this](unsigned VF,
-                                             InstructionCost VectorCost,
-                                             InstructionCost ScalarCost) {
-      return foldTailByMasking() ? VectorCost * divideCeil(MaxTripCount, VF)
-                                 : VectorCost * (MaxTripCount / VF) +
-                                       ScalarCost * (MaxTripCount % VF);
-    };
-    auto RTCostA = GetCostForTC(A.Width.getFixedValue(), CostA, A.ScalarCost);
-    auto RTCostB = GetCostForTC(B.Width.getFixedValue(), CostB, B.ScalarCost);
-
-    return RTCostA < RTCostB;
-  }
-
-  // Improve estimate for the vector width if it is scalable.
-  unsigned EstimatedWidthA = A.Width.getKnownMinValue();
-  unsigned EstimatedWidthB = B.Width.getKnownMinValue();
-  if (std::optional<unsigned> VScale = getVScaleForTuning()) {
-    if (A.Width.isScalable())
-      EstimatedWidthA *= *VScale;
-    if (B.Width.isScalable())
-      EstimatedWidthB *= *VScale;
-  }
-
-  // Assume vscale may be larger than 1 (or the value being tuned for),
-  // so that scalable vectorization is slightly favorable over fixed-width
-  // vectorization.
-  if (A.Width.isScalable() && !B.Width.isScalable())
-    return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA);
-
-  // To avoid the need for FP division:
-  //      (CostA / A.Width) < (CostB / B.Width)
-  // <=>  (CostA * B.Width) < (CostB * A.Width)
-  return (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA);
-}
-
 static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
                                    OptimizationRemarkEmitter *ORE,
                                    Loop *TheLoop) {
@@ -5474,19 +5396,81 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
   } while (!Tail.empty());
 }
 
-VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
-    const ElementCountSet &VFCandidates) {
-  InstructionCost ExpectedCost = expectedCost(ElementCount::getFixed(1)).first;
-  LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
-  assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop");
-  assert(VFCandidates.count(ElementCount::getFixed(1)) &&
-         "Expected Scalar VF to be a candidate");
+bool LoopVectorizationPlanner::isMoreProfitable(
+    const VectorizationFactor &A, const VectorizationFactor &B) const {
+  InstructionCost CostA = A.Cost;
+  InstructionCost CostB = B.Cost;
+
+  unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(OrigLoop);
 
-  const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost,
+  if (!A.Width.isScalable() && !B.Width.isScalable() && MaxTripCount) {
+    // If the trip count is a known (possibly small) constant, the trip count
+    // will be rounded up to an integer number of iterations under
+    // FoldTailByMasking. The total cost in that case will be
+    // VecCost*ceil(TripCount/VF). When not folding the tail, the total
+    // cost will be VecCost*floor(TC/VF) + ScalarCost*(TC%VF). There will be
+    // some extra overheads, but for the purpose of comparing the costs of
+    // different VFs we can use this to compare the total loop-body cost
+    // expected after vectorization.
+    auto GetCostForTC = [MaxTripCount, this](unsigned VF,
+                                             InstructionCost VectorCost,
+                                             InstructionCost ScalarCost) {
+      return CM.foldTailByMasking() ? VectorCost * divideCeil(MaxTripCount, VF)
+                                    : VectorCost * (MaxTripCount / VF) +
+                                          ScalarCost * (MaxTripCount % VF);
+    };
+    auto RTCostA = GetCostForTC(A.Width.getFixedValue(), CostA, A.ScalarCost);
+    auto RTCostB = GetCostForTC(B.Width.getFixedValue(), CostB, B.ScalarCost);
+
+    return RTCostA < RTCostB;
+  }
+
+  // Improve estimate for the vector width if it is scalable.
+  unsigned EstimatedWidthA = A.Width.getKnownMinValue();
+  unsigned EstimatedWidthB = B.Width.getKnownMinValue();
+  if (std::optional<unsigned> VScale = getVScaleForTuning()) {
+    if (A.Width.isScalable())
+      EstimatedWidthA *= *VScale;
+    if (B.Width.isScalable())
+      EstimatedWidthB *= *VScale;
+  }
+
+  // Assume vscale may be larger than 1 (or the value being tuned for),
+  // so that scalable vectorization is slightly favorable over fixed-width
+  // vectorization.
+  if (A.Width.isScalable() && !B.Width.isScalable())
+    return (CostA * B.Width.getFixedValue()) <= (CostB * EstimatedWidthA);
+
+  // To avoid the need for FP division:
+  //      (CostA / A.Width) < (CostB / B.Width)
+  // <=>  (CostA * B.Width) < (CostB * A.Width)
+  return (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA);
+}
+
+std::optional<unsigned> LoopVectorizationPlanner::getVScaleForTuning() const {
+  Function *TheFunction = OrigLoop->getHeader()->getParent();
+  if (TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
+    auto Attr = TheFunction->getFnAttribute(Attribute::VScaleRange);
+    auto Min = Attr.getVScaleRangeMin();
+    auto Max = Attr.getVScaleRangeMax();
+    if (Max && Min == Max)
+      return Max;
+  }
+
+  return TTI->getVScaleForTuning();
+}
+
+VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
+  assert(!VPlans.empty());
+
+  ElementCount ScalarFactor = ElementCount::getFixed(1);
+  const auto &[ExpectedCost, _] = CM.expectedCost(ScalarFactor);
+  const VectorizationFactor ScalarCost(ScalarFactor, ExpectedCost,
                                        ExpectedCost);
   VectorizationFactor ChosenFactor = ScalarCost;
+  assert(hasPlanWithVF(ScalarFactor) && "Expected Scalar VF to be a candidate");
 
-  bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
+  bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
   if (ForceVectorization && VFCandidates.size() > 1) {
     // Ignore scalar width, because the user explicitly wants vectorization.
     // Initialize cost to max so that VF = 2 is, at least, chosen during cost
@@ -5494,53 +5478,15 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
     ChosenFactor.Cost = InstructionCost::getMax();
   }
 
-  SmallVector<InstructionVFPair> InvalidCosts;
-  for (const auto &i : VFCandidates) {
-    // The cost for scalar VF=1 is already calculated, so ignore it.
-    if (i.isScalar())
-      continue;
-
-    VectorizationCostTy C = expectedCost(i, &InvalidCosts);
-    VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost);
-
-#ifndef NDEBUG
-    unsigned AssumedMinimumVscale = 1;
-    if (std::optional<unsigned> VScale = getVScaleForTuning())
-      AssumedMinimumVscale = *VScale;
-    unsigned Width =
-        Candidate.Width.isScalable()
-            ? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale
-            : Candidate.Width.getFixedValue();
-    LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i
-                      << " costs: " << (Candidate.Cost / Width));
-    if (i.isScalable())
-      LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of "
-                        << AssumedMinimumVscale << ")");
-    LLVM_DEBUG(dbgs() << ".\n");
-#endif
+  for (auto &Plan : VPlans) {
+    for (const auto &Candidate : getVFCandidatesFor(*Plan)) {
+      // The cost for scalar VF=1 is already calculated, so ignore it.
+      if (Candidate.Width.isScalar())
+        continue;
 
-    if (!C.second && !ForceVectorization) {
-      LLVM_DEBUG(
-          dbgs() << "LV: Not considering vector loop of width " << i
-                 << " because it will not generate any vector instructions.\n");
-      continue;
+      if (isMoreProfitable(Candidate, ChosenFactor))
+        ChosenFactor = Candidate;
     }
-
-    // If profitable add it to ProfitableVF list.
-    if (isMoreProfitable(Candidate, ScalarCost))
-      ProfitableVFs.push_back(Candidate);
-
-    if (isMoreProfitable(Candidate, ChosenFactor))
-      ChosenFactor = Candidate;
-  }
-
-  emitInvalidCostRemarks(InvalidCosts, ORE, TheLoop);
-
-  if (!EnableCondStoresVectorization && NumPredStores) {
-    reportVectorizationFailure("There are conditional stores.",
-        "store that is conditionally executed prevents vectorization",
-        "ConditionalStore", ORE, TheLoop);
-    ChosenFactor = ScalarCost;
   }
 
   LLVM_DEBUG(if (ForceVectorization && !ChosenFactor.Width.isScalar() &&
@@ -5551,11 +5497,11 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
   return ChosenFactor;
 }
 
-bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
+bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
     ElementCount VF) const {
   // Cross iteration phis such as reductions need special handling and are
   // currently unsupported.
-  if (any_of(TheLoop->getHeader()->phis(),
+  if (any_of(OrigLoop->getHeader()->phis(),
              [&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
     return false;
 
@@ -5564,26 +5510,26 @@ bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
   for (const auto &Entry : Legal->getInductionVars()) {
     // Look for uses of the value of the induction at the last iteration.
     Value *PostInc =
-        Entry.first->getIncomingValueForBlock(TheLoop->getLoopLatch());
+        Entry.first->getIncomingValueForBlock(OrigLoop->getLoopLatch());
     for (User *U : PostInc->users())
-      if (!TheLoop->contains(cast<Instruction>(U)))
+      if (!OrigLoop->contains(cast<Instruction>(U)))
         return false;
     // Look for uses of penultimate value of the induction.
     for (User *U : Entry.first->users())
-      if (!TheLoop->contains(cast<Instruction>(U)))
+      if (!OrigLoop->contains(cast<Instruction>(U)))
         return false;
   }
 
   // Epilogue vectorization code has not been auditted to ensure it handles
   // non-latch exits properly.  It may be fine, but it needs auditted and
   // tested.
-  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch())
+  if (OrigLoop->getExitingBlock() != OrigLoop->getLoopLatch())
     return false;
 
   return true;
 }
 
-bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
+bool LoopVectorizationPlanner::isEpilogueVectorizationProfitable(
     const ElementCount VF) const {
   // FIXME: We need a much better cost-model to take different parameters such
   // as register pressure, code size increase and cost of extra branches into
@@ -5591,12 +5537,12 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
   // with vectorization factors larger than a certain value.
 
   // Allow the target to opt out entirely.
-  if (!TTI.preferEpilogueVectorization())
+  if (!TTI->preferEpilogueVectorization())
     return false;
 
   // We also consider epilogue vectorization unprofitable for targets that don't
   // consider interleaving beneficial (eg. MVE).
-  if (TTI.getMaxInterleaveFactor(VF) <= 1)
+  if (TTI->getMaxInterleaveFactor(VF) <= 1)
     return false;
 
   unsigned Multiplier = 1;
@@ -5607,16 +5553,15 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
   return false;
 }
 
-VectorizationFactor
-LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
-    const ElementCount MainLoopVF, const LoopVectorizationPlanner &LVP) {
+VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
+    const ElementCount MainLoopVF) {
   VectorizationFactor Result = VectorizationFactor::Disabled();
   if (!EnableEpilogueVectorization) {
     LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is disabled.\n");
     return Result;
   }
 
-  if (!isScalarEpilogueAllowed()) {
+  if (!CM.isScalarEpilogueAllowed()) {
     LLVM_DEBUG(dbgs() << "LEV: Unable to vectorize epilogue because no "
                          "epilogue is allowed.\n");
     return Result;
@@ -5633,7 +5578,7 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
   if (EpilogueVectorizationForceVF > 1) {
     LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n");
     ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF);
-    if (LVP.hasPlanWithVF(ForcedEC))
+    if (hasPlanWithVF(ForcedEC))
       return {ForcedEC, 0, 0};
     else {
       LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization forced factor is not "
@@ -5642,8 +5587,8 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
     }
   }
 
-  if (TheLoop->getHeader()->getParent()->hasOptSize() ||
-      TheLoop->getHeader()->getParent()->hasMinSize()) {
+  if (OrigLoop->getHeader()->getParent()->hasOptSize() ||
+      OrigLoop->getHeader()->getParent()->hasMinSize()) {
     LLVM_DEBUG(
         dbgs() << "LEV: Epilogue vectorization skipped due to opt for size.\n");
     return Result;
@@ -5665,13 +5610,16 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
       EstimatedRuntimeVF *= *VScale;
   }
 
-  for (auto &NextVF : ProfitableVFs)
-    if (((!NextVF.Width.isScalable() && MainLoopVF.isScalable() &&
-          ElementCount::isKnownLT(NextVF.Width, EstimatedRuntimeVF)) ||
-         ElementCount::isKnownLT(NextVF.Width, MainLoopVF)) &&
-        (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) &&
-        LVP.hasPlanWithVF(NextVF.Width))
-      Result = NextVF;
+  for (auto &VPlan : VPlans) {
+    for (const auto &NextVF : getVFCandidatesFor(*VPlan)) {
+      assert(VPlan->hasVF(NextVF.Width) && "VF not in plan");
+      if (((!NextVF.Width.isScalable() && MainLoopVF.isScalable() &&
+            ElementCount::isKnownLT(NextVF.Width, EstimatedRuntimeVF)) ||
+           ElementCount::isKnownLT(NextVF.Width, MainLoopVF)) &&
+          (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)))
+        Result = NextVF;
+    }
+  }
 
   if (Result != VectorizationFactor::Disabled())
     LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = "
@@ -6371,8 +6319,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
   return Discount;
 }
 
-LoopVectorizationCostModel::VectorizationCostTy
-LoopVectorizationCostModel::expectedCost(
+VectorizationCostTy LoopVectorizationCostModel::expectedCost(
     ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
   VectorizationCostTy Cost;
 
@@ -6824,7 +6771,7 @@ LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
   return getWideningCost(I, VF);
 }
 
-LoopVectorizationCostModel::VectorizationCostTy
+VectorizationCostTy
 LoopVectorizationCostModel::getInstructionCost(Instruction *I,
                                                ElementCount VF) {
   // If we know that this instruction will remain uniform, check the cost of
@@ -7631,7 +7578,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
     return VectorizationFactor::Disabled();
 
   // Select the optimal vectorization factor.
-  VectorizationFactor VF = CM.selectVectorizationFactor(VFCandidates);
+  VectorizationFactor VF = selectVectorizationFactor();
   assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
   if (!hasPlanWithVF(VF.Width)) {
     LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width
@@ -8104,6 +8051,7 @@ void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
     VFRange SubRange = {VF, MaxVFTimes2};
     VPlans.push_back(buildVPlan(SubRange));
+    VFCandidates[&*VPlans.back()] = SmallVector<VectorizationFactor>();
     VF = SubRange.End;
   }
 }
@@ -8708,6 +8656,20 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
   return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
 }
 
+Type *VPlanCostModel::truncateToMinimalBitwidth(Type *ValTy,
+                                                Instruction *I) const {
+  auto MinBWs = CM.getMinimalBitwidths();
+  if (MinBWs.contains(I))
+    ValTy = IntegerType::get(ValTy->getContext(), MinBWs[I]);
+  return ValTy;
+}
+
+InstructionCost VPlanCostModel::getLegacyInstructionCost(Instruction *I,
+                                                         ElementCount VF) {
+  VectorizationCostTy Cost = CM.getInstructionCost(I, VF);
+  return Cost.first;
+}
+
 void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
                                                         ElementCount MaxVF) {
   assert(OrigLoop->isInnermost() && "Inner loop expected.");
@@ -8720,13 +8682,68 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
   auto &ConditionalAssumes = Legal->getConditionalAssumes();
   DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end());
 
+  InstructionCost ScalarCost = CM.expectedCost(ElementCount::getFixed(1)).first;
+  LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ScalarCost << ".\n");
+
+  bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
+  SmallVector<InstructionVFPair> InvalidCosts;
   auto MaxVFTimes2 = MaxVF * 2;
   for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
     VFRange SubRange = {VF, MaxVFTimes2};
-    if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, DeadInstructions))
-      VPlans.push_back(std::move(*Plan));
+    auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, DeadInstructions);
+    if (!Plan) {
+      VF = SubRange.End;
+      continue;
+    }
+    VPlans.emplace_back(std::move(*Plan));
     VF = SubRange.End;
   }
+
+  VPlanCostModel VPCM(*TTI, PSE.getSE()->getContext(), CM);
+  for (const VPlanPtr &Plan : VPlans) {
+    SmallVector<VectorizationFactor> Costs;
+    for (ElementCount CostVF : Plan->getVFs()) {
+      VectorizationCostTy C;
+      if (CostUsingVPlan) {
+        C.first = VPCM.expectedCost(*Plan, CostVF, C.second);
+      } else
+        C = CM.expectedCost(CostVF, &InvalidCosts);
+      auto [VecCost, IsVec] = C;
+#ifndef NDEBUG
+      unsigned AssumedMinimumVscale = 1;
+      if (std::optional<unsigned> VScale = getVScaleForTuning())
+        AssumedMinimumVscale = *VScale;
+      unsigned Width = CostVF.isScalable()
+                           ? CostVF.getKnownMinValue() * AssumedMinimumVscale
+                           : CostVF.getFixedValue();
+      LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << CostVF
+                        << " costs: " << (VecCost / Width));
+      if (CostVF.isScalable())
+        LLVM_DEBUG(dbgs() << " (assuming a minimum vscale of "
+                          << AssumedMinimumVscale << ")");
+      LLVM_DEBUG(dbgs() << ".\n");
+#endif
+      if (CostVF.isVector() && !IsVec && !ForceVectorization) {
+        LLVM_DEBUG(
+            dbgs()
+            << "LV: Not considering vector loop of width " << CostVF
+            << " because it will not generate any vector instructions.\n");
+        continue;
+      }
+
+      Costs.emplace_back(VectorizationFactor(CostVF, VecCost, ScalarCost));
+    }
+    VFCandidates[&*Plan] = Costs;
+  }
+  emitInvalidCostRemarks(InvalidCosts, ORE, OrigLoop);
+
+  if (!EnableCondStoresVectorization && CM.hasPredStores()) {
+    reportVectorizationFailure(
+        "There are conditional stores.",
+        "store that is conditionally executed prevents vectorization",
+        "ConditionalStore", ORE, OrigLoop);
+    VPlans.clear();
+  }
 }
 
 // Add the necessary canonical IV and branch recipes required to control the
@@ -10268,7 +10285,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     bool ForceVectorization =
         Hints.getForce() == LoopVectorizeHints::FK_Enabled;
     if (!ForceVectorization &&
-        !areRuntimeChecksProfitable(Checks, VF, CM.getVScaleForTuning(), L,
+        !areRuntimeChecksProfitable(Checks, VF, LVP.getVScaleForTuning(), L,
                                     *PSE.getSE())) {
       ORE->emit([&]() {
         return OptimizationRemarkAnalysisAliasing(
@@ -10390,7 +10407,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
 
       // Consider vectorizing the epilogue too if it's profitable.
       VectorizationFactor EpilogueVF =
-          CM.selectEpilogueVectorizationFactor(VF.Width, LVP);
+          LVP.selectEpilogueVectorizationFactor(VF.Width);
       if (EpilogueVF.Width.isVector()) {
 
         // The first pass vectorizes the main loop and creates a scalar epilogue
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 45fc5041f9e55..b929faab011c4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -756,6 +756,11 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
     return cast<Instruction>(getVPSingleValue()->getUnderlyingValue());
   }
 
+  bool hasUnderlyingInstr() const {
+    return getNumDefinedValues() == 1 &&
+           getVPSingleValue()->getUnderlyingValue() != nullptr;
+  }
+
   /// Method to support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const VPDef *D) {
     // All VPDefs are also VPRecipeBases.
@@ -2320,6 +2325,9 @@ class VPlan {
     UFs.insert(UF);
   }
 
+  /// Return the VFs represented in the plan.
+  ArrayRef<ElementCount> getVFs() const { return VFs.getArrayRef(); }
+
   /// Return a string with the name of the plan and the applicable VFs and UFs.
   std::string getName() const;
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanCostModel.cpp b/llvm/lib/Transforms/Vectorize/VPlanCostModel.cpp
new file mode 100644
index 0000000000000..7384300cc7d50
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanCostModel.cpp
@@ -0,0 +1,284 @@
+//===- VPlanCostModel.h - VPlan-based Vectorizer Cost Model ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// VPlan-based cost model
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/TypeSwitch.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+
+#include "VPlan.h"
+#include "VPlanCFG.h"
+#include "VPlanCostModel.h"
+#include "VPlanValue.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "vplan-cost-model"
+
+namespace llvm {
+InstructionCost VPlanCostModel::expectedCost(const VPlan &Plan, ElementCount VF,
+                                             bool &IsVec) {
+  InstructionCost VectorIterCost = 0;
+  for (const VPBlockBase *Block : vp_depth_first_deep(Plan.getEntry()))
+    VectorIterCost += getCost(Block, VF, IsVec);
+
+  return VectorIterCost;
+}
+
+InstructionCost VPlanCostModel::getCost(const VPBlockBase *Block,
+                                        ElementCount VF, bool &IsVec) {
+  return TypeSwitch<const VPBlockBase *, InstructionCost>(Block)
+      .Case<VPBasicBlock>([&](const VPBasicBlock *BBlock) {
+        InstructionCost Cost = 0;
+        for (const VPRecipeBase &Recipe : *BBlock)
+          Cost += getCost(&Recipe, VF, IsVec);
+        return Cost;
+      })
+      .Default([&](const VPBlockBase *BBlock) -> InstructionCost { return 0; });
+}
+
+InstructionCost VPlanCostModel::getCost(const VPRecipeBase *Recipe,
+                                        ElementCount VF, bool &IsVec) {
+  auto *ScCondTy = Type::getInt1Ty(Context);
+  auto *VecCondTy = VectorType::get(ScCondTy, VF);
+  InstructionCost Cost =
+      TypeSwitch<const VPRecipeBase *, InstructionCost>(Recipe)
+          .Case<VPInstruction>([&](const VPInstruction *VPI)
+                                   -> InstructionCost {
+            unsigned Opcode = VPI->getOpcode();
+            if (Instruction::isBinaryOp(Opcode)) {
+              // Operands: A, B
+              IsVec |= true;
+              Type *VectorTy = VectorType::get(getReturnElementType(VPI), VF);
+              return TTI.getArithmeticInstrCost(Opcode, VectorTy, CostKind);
+            }
+            switch (Opcode) {
+            case VPInstruction::Not: {
+              // Operands: A
+              IsVec |= true;
+              Type *VectorTy = VectorType::get(getElementType(VPI, 0), VF);
+              return TTI.getArithmeticInstrCost(Instruction::Xor, VectorTy,
+                                                CostKind);
+            }
+            case VPInstruction::ICmpULE: {
+              // Operands: IV, TripCount
+              IsVec |= true;
+              Type *VectorTy = VectorType::get(getElementType(VPI, 0), VF);
+              return TTI.getCmpSelInstrCost(Instruction::ICmp, VectorTy,
+                                            VecCondTy, CmpInst::ICMP_ULE,
+                                            CostKind);
+            }
+            case Instruction::Select: {
+              // Operands: Cond, Op1, Op2
+              IsVec |= true;
+              Type *VectorTy = VectorType::get(getReturnElementType(VPI), VF);
+              return TTI.getCmpSelInstrCost(
+                  Instruction::Select, VectorTy, VecCondTy,
+                  CmpInst::BAD_ICMP_PREDICATE, CostKind);
+            }
+            case VPInstruction::ActiveLaneMask: {
+              // Operands: IV, TripCount
+              IsVec |= true;
+              Type *OpTy = Type::getIntNTy(
+                  Context, getElementType(VPI, 0)->getScalarSizeInBits());
+              IntrinsicCostAttributes ICA(Intrinsic::get_active_lane_mask,
+                                          VecCondTy, {OpTy, OpTy});
+              return TTI.getIntrinsicInstrCost(ICA, CostKind);
+            }
+            case VPInstruction::FirstOrderRecurrenceSplice: {
+              // Operands: FOR, FOR.backedge
+              IsVec |= true;
+              Type *VectorTy = VectorType::get(getReturnElementType(VPI), VF);
+              SmallVector<int> Mask(VF.getKnownMinValue());
+              std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
+              return TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
+                                        cast<VectorType>(VectorTy), Mask,
+                                        CostKind, VF.getKnownMinValue() - 1);
+            }
+            case VPInstruction::CalculateTripCountMinusVF: {
+              // Operands: TripCount
+              Type *ScalarTy = getReturnElementType(VPI);
+              return TTI.getArithmeticInstrCost(Instruction::Sub, ScalarTy,
+                                                CostKind) +
+                     TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
+                                            ScCondTy, CmpInst::ICMP_UGT,
+                                            CostKind) +
+                     TTI.getCmpSelInstrCost(
+                         Instruction::Select, ScalarTy, ScCondTy,
+                         CmpInst::BAD_ICMP_PREDICATE, CostKind);
+            }
+            case VPInstruction::CanonicalIVIncrement:
+            case VPInstruction::CanonicalIVIncrementNUW:
+              // Operands: IVPhi, CanonicalIVIncrement
+            case VPInstruction::CanonicalIVIncrementForPart:
+            case VPInstruction::CanonicalIVIncrementForPartNUW: {
+              // Operands: StartV
+              Type *ScalarTy = getReturnElementType(VPI);
+              return TTI.getArithmeticInstrCost(Instruction::Add, ScalarTy,
+                                                CostKind);
+            }
+            case VPInstruction::BranchOnCond:
+              // Operands: Cond
+            case VPInstruction::BranchOnCount: {
+              // Operands: IV, TripCount
+              Type *ScalarTy = getElementType(VPI, 0);
+              return TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
+                                            ScCondTy, CmpInst::ICMP_EQ,
+                                            CostKind) +
+                     TTI.getCFInstrCost(Instruction::Br, CostKind);
+            }
+            default:
+              llvm_unreachable("Unsupported opcode for VPInstruction");
+            } // end of switch
+          })
+          .Case<VPWidenMemoryInstructionRecipe>(
+              [&](const VPWidenMemoryInstructionRecipe *VPWMIR) {
+                IsVec |= true;
+                return getMemoryOpCost(VPWMIR, VF);
+              })
+          .Default([&](const VPRecipeBase *R) -> InstructionCost {
+            if (!R->hasUnderlyingInstr()) {
+              LLVM_DEBUG(
+                  dbgs() << "VPlanCM: unsupported recipe ";
+                  VPSlotTracker SlotTracker((Recipe->getParent())
+                                                ? Recipe->getParent()->getPlan()
+                                                : nullptr);
+                  Recipe->print(dbgs(), Twine(), SlotTracker); dbgs() << '\n');
+              return 0;
+            }
+            Instruction *I = const_cast<Instruction *>(R->getUnderlyingInstr());
+            return getLegacyInstructionCost(I, VF);
+          });
+
+  LLVM_DEBUG(dbgs() << "VPlanCM: cost " << Cost << " for VF " << VF
+                    << " for VPInstruction: ";
+             VPSlotTracker SlotTracker((Recipe->getParent())
+                                           ? Recipe->getParent()->getPlan()
+                                           : nullptr);
+             Recipe->print(dbgs(), Twine(), SlotTracker); dbgs() << '\n');
+  return Cost;
+}
+
+InstructionCost VPlanCostModel::getMemoryOpCost(const Instruction *I, Type *Ty,
+                                                bool IsConsecutive,
+                                                bool IsMasked, bool IsReverse) {
+  const Align Alignment = getLoadStoreAlignment(const_cast<Instruction *>(I));
+  const Value *Ptr = getLoadStorePointerOperand(I);
+  unsigned AS = getLoadStoreAddressSpace(const_cast<Instruction *>(I));
+  if (IsConsecutive) {
+    InstructionCost Cost = 0;
+    if (IsMasked) {
+      Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), Ty, Alignment, AS,
+                                        CostKind);
+    } else {
+      TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
+      Cost += TTI.getMemoryOpCost(I->getOpcode(), Ty, Alignment, AS, CostKind,
+                                  OpInfo, I);
+    }
+    if (IsReverse)
+      Cost +=
+          TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,
+                             cast<VectorType>(Ty), std::nullopt, CostKind, 0);
+    return Cost;
+  }
+  return TTI.getAddressComputationCost(Ty) +
+         TTI.getGatherScatterOpCost(I->getOpcode(), Ty, Ptr, IsMasked,
+                                    Alignment, CostKind, I);
+}
+
+InstructionCost
+VPlanCostModel::getMemoryOpCost(const VPWidenMemoryInstructionRecipe *VPWMIR,
+                                ElementCount VF) {
+  Instruction *I = &VPWMIR->getIngredient();
+  const bool IsMasked = VPWMIR->getMask() != nullptr;
+  Type *VectorTy = VectorType::get(getReturnElementType(VPWMIR), VF);
+
+  return getMemoryOpCost(I, VectorTy, VPWMIR->isConsecutive(), IsMasked,
+                         VPWMIR->isReverse());
+}
+
+// Return element type the recipe processes since VF is not carried in VPlan
+Type *VPlanCostModel::getElementType(const VPRecipeBase *Recipe,
+                                     unsigned N) const {
+  auto TruncatedType = [&](Value *V) -> Type * {
+    Type *ValTy = V->getType();
+    ;
+    if (llvm::Instruction *Inst = llvm::dyn_cast<llvm::Instruction>(V))
+      ValTy = truncateToMinimalBitwidth(V->getType(), Inst);
+    return ValTy;
+  };
+  Value *V = Recipe->getOperand(N)->getUnderlyingValue();
+  if (V)
+    return TruncatedType(V);
+  assert(Recipe->getOperand(N)->hasDefiningRecipe() &&
+         "VPValue has no live-in and defining recipe");
+  return getReturnElementType(Recipe->getOperand(N)->getDefiningRecipe());
+}
+
+Type *VPlanCostModel::getReturnElementType(const VPRecipeBase *Recipe) const {
+  auto *Int1Ty = Type::getInt1Ty(Context);
+  Type *ValTy =
+      TypeSwitch<const VPRecipeBase *, Type *>(Recipe)
+          .Case<VPInstruction>([&](const VPInstruction *VPI) -> Type * {
+            unsigned Opcode = VPI->getOpcode();
+            if (Instruction::isBinaryOp(Opcode))
+              // Operands: A, B
+              return getElementType(VPI, 0);
+            switch (Opcode) {
+            case VPInstruction::Not:
+              // Operands: A
+            case VPInstruction::ICmpULE:
+              // Operands: IV, TripCount
+              return Int1Ty;
+            case Instruction::Select:
+              // Operands: Cond, Op1, Op2
+              return getElementType(VPI, 1);
+            case VPInstruction::ActiveLaneMask:
+              // Operands: IV, TripCount
+              return Int1Ty;
+            case VPInstruction::FirstOrderRecurrenceSplice:
+              // Operands: FOR, FOR.backedge
+            case VPInstruction::CalculateTripCountMinusVF:
+              // Operands: TripCount
+            case VPInstruction::CanonicalIVIncrement:
+            case VPInstruction::CanonicalIVIncrementNUW:
+              // Operands: IVPhi, CanonicalIVIncrement
+            case VPInstruction::CanonicalIVIncrementForPart:
+            case VPInstruction::CanonicalIVIncrementForPartNUW:
+              // Operands: StartV
+              return getElementType(VPI, 0);
+            case VPInstruction::BranchOnCond:
+              // Operands: Cond
+            case VPInstruction::BranchOnCount: {
+              // Operands: IV, TripCount
+              llvm_unreachable("Operation doesn't have return type");
+            }
+            default:
+              llvm_unreachable("Unsupported opcode for VPInstruction");
+            }
+          })
+          .Case<VPWidenMemoryInstructionRecipe>(
+              [&](const VPWidenMemoryInstructionRecipe *VPWMIR) -> Type * {
+                Instruction *I = &VPWMIR->getIngredient();
+                Type *ValTy = truncateToMinimalBitwidth(getLoadStoreType(I), I);
+                return ValTy;
+              })
+          .Default([&](const VPRecipeBase *R) -> Type * {
+            llvm_unreachable("Unsupported VPRecipe");
+          });
+  return ValTy;
+}
+
+} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/VPlanCostModel.h b/llvm/lib/Transforms/Vectorize/VPlanCostModel.h
new file mode 100644
index 0000000000000..a9b47e1ff0c4a
--- /dev/null
+++ b/llvm/lib/Transforms/Vectorize/VPlanCostModel.h
@@ -0,0 +1,71 @@
+//===- VPlanCostModel.cpp - Vectorizer Cost Model ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// VPlan-based cost model
+///
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Instruction.h"
+
+#include "VPlan.h"
+#include "VPlanValue.h"
+
+namespace llvm {
+class Type;
+class TargetTransformInfo;
+class LoopVectorizationCostModel;
+
+class VPlanCostModel {
+public:
+  explicit VPlanCostModel(const TargetTransformInfo &TTI,
+                          llvm::LLVMContext &Context,
+                          LoopVectorizationCostModel &CM)
+      : TTI(TTI), Context(Context), CM(CM) {}
+
+  /// Return cost of the VPlan for a given \p VF
+  InstructionCost expectedCost(const VPlan &Plan, ElementCount VF, bool &IsVec);
+
+private:
+  /// Return individual cost of the \p VPBasicBlock for a given \p VF
+  InstructionCost getCost(const VPBlockBase *Block, ElementCount VF,
+                          bool &IsVec);
+
+  /// Return individual cost of the \p Recipe for a given \p VF
+  InstructionCost getCost(const VPRecipeBase *Recipe, ElementCount VF,
+                          bool &IsVec);
+
+  /// Return individual cost of the \p Recipe for a given \p VF
+  InstructionCost getLegacyInstructionCost(Instruction *I, ElementCount VF);
+
+  InstructionCost getMemoryOpCost(const VPWidenMemoryInstructionRecipe *VPWMIR,
+                                  ElementCount VF);
+
+  /// Return cost of the individual memory operation of a instruction \p I of a
+  /// given type \p Ty
+  InstructionCost getMemoryOpCost(const Instruction *I, Type *Ty,
+                                  bool IsConsecutive, bool IsMasked,
+                                  bool IsReverse);
+
+  Type *getElementType(const VPRecipeBase *Recipe, unsigned N) const;
+  Type *getReturnElementType(const VPRecipeBase *Recipe) const;
+  Type *truncateToMinimalBitwidth(Type *ValTy, Instruction *I) const;
+
+  /// Vector target information.
+  const TargetTransformInfo &TTI;
+
+  LLVMContext &Context;
+
+  /// FIXME: Legacy model is only here during our transition to the vplan-based
+  /// model
+  LoopVectorizationCostModel &CM;
+
+  /// Use same cost kind in the cost model
+  const TargetTransformInfo::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+};
+} // namespace llvm