[RISCV][TTI] Recognize CONCAT_VECTORS if a shufflevector mask is multiple insert subvector.#110457
Conversation
|
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-risc-v Author: Han-Kuan Chen (HanKuanChen) ChangesFull diff: https://github.com/llvm/llvm-project/pull/110457.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 768df71715fa63..e8a5477fbbc599 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -343,6 +343,40 @@ RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) {
/*AddressSpace=*/0, CostKind);
}
+InstructionCost
+RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind) {
+ if (!isa<FixedVectorType>(Tp))
+ return InstructionCost::getInvalid();
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
+ if (LT.second.getScalarSizeInBits() == 1)
+ return InstructionCost::getInvalid();
+ // Try to guess SubTp.
+ for (unsigned SubVecSize = 1, E = Mask.size(); SubVecSize < E;
+ SubVecSize <<= 1) {
+ if (E % SubVecSize != 0)
+ continue;
+ SmallVector<int> RepeatedPattern(createSequentialMask(0, SubVecSize, 0));
+ bool Skip = false;
+ for (unsigned I = 0; I != E; I += SubVecSize)
+ if (!Mask.slice(I, SubVecSize).equals(RepeatedPattern)) {
+ Skip = true;
+ break;
+ }
+ if (Skip)
+ continue;
+ InstructionCost Cost = 0;
+ FixedVectorType *SubTp = FixedVectorType::get(
+ cast<FixedVectorType>(Tp)->getElementType(), SubVecSize);
+ // The cost of extraction from a subvector is 0 if the index is 0.
+ for (unsigned I = 0; I != E; I += SubVecSize)
+ Cost +=
+ getShuffleCost(TTI::SK_InsertSubvector, Tp, {}, CostKind, I, SubTp);
+ return Cost;
+ }
+ return InstructionCost::getInvalid();
+}
+
static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,
LLVMContext &C) {
assert((DataVT.getScalarSizeInBits() != 8 ||
@@ -394,6 +428,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
LT.second, CostKind);
}
}
+ if (InstructionCost Cost =
+ isMultipleInsertSubvector(Tp, Mask, CostKind);
+ Cost.isValid())
+ return Cost;
}
// vrgather + cost of generating the mask constant.
// We model this for an unknown mask with a single vrgather.
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index f16c4fc0eed023..599c9a079874b8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -55,6 +55,12 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
/// type.
InstructionCost getConstantPoolLoadCost(Type *Ty,
TTI::TargetCostKind CostKind);
+
+ /// Return the cost if a shufflevector can be consist of multiple vslideup.
+ /// Otherwise, return InstructionCost::getInvalid().
+ InstructionCost isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask,
+ TTI::TargetCostKind CostKind);
+
public:
explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
: BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)),
diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll
new file mode 100644
index 00000000000000..a0c2413c8655a6
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-insert-subvector.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+entry:
+ %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ ret void
+}
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
index bb806be15c71ca..23a9a654c96f9e 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/remarks-insert-into-small-vector.ll
@@ -8,7 +8,7 @@
; YAML-NEXT: Function: test
; YAML-NEXT: Args:
; YAML-NEXT: - String: 'Stores SLP vectorized with cost '
-; YAML-NEXT: - Cost: '2'
+; YAML-NEXT: - Cost: '0'
; YAML-NEXT: - String: ' and with tree size '
; YAML-NEXT: - TreeSize: '7'
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
index 887f59bbda94d6..ed1cb6a77f5857 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec-getGatherCost.ll
@@ -8,7 +8,7 @@
; YAML: Function: test
; YAML: Args:
; YAML: - String: 'Stores SLP vectorized with cost '
-; YAML: - Cost: '6'
+; YAML: - Cost: '4'
; YAML: - String: ' and with tree size '
; YAML: - TreeSize: '5'
|
| define void @test() { | ||
| ; CHECK-LABEL: 'test' | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | ||
| ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> |
There was a problem hiding this comment.
Is this still overestimating the cost? Putting this into llc -mattr=+v:
define <16 x i16> @f(<4 x i16> %a, <4 x i16> %b) {
%c = shufflevector <4 x i16> %a, <4 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x i16> %c
}I'm counting a total of 3 LMUL ops, so I think this should be costed as 3.
f:
vmv1r.v v10, v8
vsetivli zero, 8, e16, m1, ta, ma
vslideup.vi v10, v8, 4
vmv2r.v v8, v10
vsetivli zero, 16, e16, m2, ta, ma
vslideup.vi v8, v10, 8
rete79f8d1 to
66e4c34
Compare
|
ping |
…iple insert subvector. (llvm#110457)
* commit 'FETCH_HEAD': [clang][bytecode] Handle UETT_OpenMPRequiredSimdAlign (llvm#111259) [mlir][polynomial] Add and verify constraints of coefficientModulus for ringAttr (llvm#111016) [clang][bytecode] Save a per-Block IsWeak bit (llvm#111248) [analyzer] Fix wrong `builtin_*_overflow` return type (llvm#111253) [SelectOpt] Don't convert constant selects to branches. (llvm#110858) [InstCombine] Update and-or-icmps.ll after 574266c. NFC [Instcombine] Test for more gep canonicalization [NFC][TableGen] Change `CodeGenIntrinsics` to use const references (llvm#111219) Add warning message to `session save` when transcript isn't saved. (llvm#109020) [RISCV][TTI] Recognize CONCAT_VECTORS if a shufflevector mask is multiple insert subvector. (llvm#110457) Revert "[InstCombine] Folding `(icmp eq/ne (and X, -P2), INT_MIN)`" (llvm#111236) [NFC][lsan] Add SuspendAllThreads traces [lsan] Add `thread_suspend_fail` flag Signed-off-by: kyvangka1610 <kyvangka2002@gmail.com>
lukel97
left a comment
There was a problem hiding this comment.
Hi, even though the comments were marked as resolved PRs usually still need explicit approval before being merged.
Would it be possible to revert this and open another PR so it can go through another round of review? Or was this reviewed internally within SiFive?
Unfortunately I don't think you can reopen merged PRs on GitHub
| for (unsigned SubVecSize = 1, E = Mask.size(); SubVecSize < E; | ||
| SubVecSize <<= 1) { | ||
| if (E % SubVecSize != 0) | ||
| continue; | ||
| SmallVector<int> RepeatedPattern(createSequentialMask(0, SubVecSize, 0)); | ||
| bool Skip = false; | ||
| for (unsigned I = 0; I != E; I += SubVecSize) | ||
| if (!Mask.slice(I, SubVecSize).equals(RepeatedPattern)) { | ||
| Skip = true; | ||
| break; | ||
| } | ||
| if (Skip) | ||
| continue; |
There was a problem hiding this comment.
I think we can work out the subvector size in one iteration over the mask rather than with a nested loop, if we iterate over each index and mark when it goes back to zero, then check the rest of the mask is mask[i] == i % subvecsize
There was a problem hiding this comment.
What do you mean "if we iterate over each index and mark when it goes back to zero"?
| for (unsigned I = 0; I != NumSlides; ++I) { | ||
| unsigned InsertIndex = SubVecSize * (1 << I); | ||
| FixedVectorType *SubTp = FixedVectorType::get( | ||
| cast<FixedVectorType>(Tp)->getElementType(), InsertIndex); |
There was a problem hiding this comment.
Should this be
| cast<FixedVectorType>(Tp)->getElementType(), InsertIndex); | |
| cast<FixedVectorType>(Tp)->getElementType(), SubVecSize); |
There was a problem hiding this comment.
The previous iteration result is going to be the next iteration subvector because we are trying to concat the same subvector.
If we use SubVecSize, that means we are concat multiple different subvector.
I have reverted this commit as it violates project review policy. Feel free to move to a new review, but you need to have approval before landing. |
…iple insert subvector. (llvm#110457)
No description provided.