llvm · HanKuanChen · Oct 5, 2024 · Sep 27, 2024 · Sep 27, 2024 · Sep 30, 2024
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -343,6 +343,49 @@ RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty,  TTI::TargetCostKind CostKind) {
                              /*AddressSpace=*/0, CostKind);
 }
 
+InstructionCost
+RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
+                                        TTI::TargetCostKind CostKind) {
+  if (!isa<FixedVectorType>(Tp))
+    return InstructionCost::getInvalid();
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
+  if (LT.second.getScalarSizeInBits() == 1)
+    return InstructionCost::getInvalid();
+  // Try to guess SubTp.
+  for (unsigned SubVecSize = 1, E = Mask.size(); SubVecSize < E;
+       SubVecSize <<= 1) {
+    if (E % SubVecSize != 0)
+      continue;
+    SmallVector<int> RepeatedPattern(createSequentialMask(0, SubVecSize, 0));
+    bool Skip = false;
+    for (unsigned I = 0; I != E; I += SubVecSize)
+      if (!Mask.slice(I, SubVecSize).equals(RepeatedPattern)) {
+        Skip = true;
+        break;
+      }
+    if (Skip)
+      continue;
+    InstructionCost Cost = 0;
+    unsigned NumSlides = Log2_32(E / SubVecSize);
+    // The cost of extraction from a subvector is 0 if the index is 0.
+    for (unsigned I = 0; I != NumSlides; ++I) {
+      unsigned InsertIndex = SubVecSize * (1 << I);
+      FixedVectorType *SubTp = FixedVectorType::get(
+          cast<FixedVectorType>(Tp)->getElementType(), InsertIndex);
-          cast<FixedVectorType>(Tp)->getElementType(), InsertIndex);
+          cast<FixedVectorType>(Tp)->getElementType(), SubVecSize);
-          cast<FixedVectorType>(Tp)->getElementType(), InsertIndex);
+          cast<FixedVectorType>(Tp)->getElementType(), SubVecSize);
+      FixedVectorType *DesTp =
+          FixedVectorType::getDoubleElementsVectorType(SubTp);
+      std::pair<InstructionCost, MVT> DesLT = getTypeLegalizationCost(DesTp);
+      // Add the cost of whole vector register move because the destination
+      // vector register group for vslideup cannot overlap the source.
+      Cost += DesLT.first * TLI->getLMULCost(DesLT.second);
+      Cost += getShuffleCost(TTI::SK_InsertSubvector, DesTp, {}, CostKind,
+                             InsertIndex, SubTp);
+    }
+    return Cost;
+  }
+  return InstructionCost::getInvalid();
+}
+
 static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,
                                         LLVMContext &C) {
   assert((DataVT.getScalarSizeInBits() != 8 ||
@@ -394,6 +437,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
                                                         LT.second, CostKind);
           }
         }
+        if (InstructionCost Cost =
+                isMultipleInsertSubvector(Tp, Mask, CostKind);
+            Cost.isValid())
+          return Cost;
       }
       // vrgather + cost of generating the mask constant.
       // We model this for an unknown mask with a single vrgather.

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -55,6 +55,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
   /// type.
   InstructionCost getConstantPoolLoadCost(Type *Ty,
                                           TTI::TargetCostKind CostKind);
+
+  /// Return the cost if a shufflevector can be consist of multiple vslideup.
+  /// Otherwise, return InstructionCost::getInvalid().
+  InstructionCost isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
+                                            TTI::TargetCostKind CostKind);
+
 public:
   explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
       : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),

diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+  %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+  ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
@@ -8,7 +8,7 @@
 ; YAML-NEXT:  Function:        test
 ; YAML-NEXT:  Args:
 ; YAML-NEXT:  - String:          'Stores SLP vectorized with cost '
-; YAML-NEXT:  - Cost:            '2'
+; YAML-NEXT:  - Cost:            '0'
 ; YAML-NEXT:  - String:          ' and with tree size '
 ; YAML-NEXT:  - TreeSize:        '7'
 

diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
@@ -8,7 +8,7 @@
 ; YAML: Function:        test1
 ; YAML: Args:
 ; YAML:   - String:          'Stores SLP vectorized with cost '
-; YAML:   - Cost:            '6'
+; YAML:   - Cost:            '4'
 ; YAML:   - String:          ' and with tree size '
 ; YAML:   - TreeSize:        '5'
 
@@ -47,7 +47,7 @@ declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
 ; YAML: Function:        test2
 ; YAML: Args:
 ; YAML:   - String:          'Stores SLP vectorized with cost '
-; YAML:   - Cost:            '16'
+; YAML:   - Cost:            '12'
 ; YAML:   - String:          ' and with tree size '
 ; YAML:   - TreeSize:        '5'