-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV][TTI] Recognize CONCAT_VECTORS if a shufflevector mask is multiple insert subvector. #110457
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ab1faf5
22cb3db
5c43d52
66e4c34
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -343,6 +343,49 @@ RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) { | |||||
/*AddressSpace=*/0, CostKind); | ||||||
} | ||||||
|
||||||
InstructionCost | ||||||
RISCVTTIImpl::isMultipleInsertSubvector(VectorType *Tp, ArrayRef<int> Mask, | ||||||
TTI::TargetCostKind CostKind) { | ||||||
if (!isa<FixedVectorType>(Tp)) | ||||||
return InstructionCost::getInvalid(); | ||||||
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp); | ||||||
if (LT.second.getScalarSizeInBits() == 1) | ||||||
return InstructionCost::getInvalid(); | ||||||
// Try to guess SubTp. | ||||||
for (unsigned SubVecSize = 1, E = Mask.size(); SubVecSize < E; | ||||||
SubVecSize <<= 1) { | ||||||
if (E % SubVecSize != 0) | ||||||
continue; | ||||||
SmallVector<int> RepeatedPattern(createSequentialMask(0, SubVecSize, 0)); | ||||||
bool Skip = false; | ||||||
for (unsigned I = 0; I != E; I += SubVecSize) | ||||||
if (!Mask.slice(I, SubVecSize).equals(RepeatedPattern)) { | ||||||
Skip = true; | ||||||
break; | ||||||
} | ||||||
if (Skip) | ||||||
continue; | ||||||
InstructionCost Cost = 0; | ||||||
unsigned NumSlides = Log2_32(E / SubVecSize); | ||||||
// The cost of extraction from a subvector is 0 if the index is 0. | ||||||
for (unsigned I = 0; I != NumSlides; ++I) { | ||||||
unsigned InsertIndex = SubVecSize * (1 << I); | ||||||
FixedVectorType *SubTp = FixedVectorType::get( | ||||||
cast<FixedVectorType>(Tp)->getElementType(), InsertIndex); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The previous iteration result is going to be the next iteration subvector because we are trying to concat the same subvector. |
||||||
FixedVectorType *DesTp = | ||||||
FixedVectorType::getDoubleElementsVectorType(SubTp); | ||||||
std::pair<InstructionCost, MVT> DesLT = getTypeLegalizationCost(DesTp); | ||||||
// Add the cost of whole vector register move because the destination | ||||||
// vector register group for vslideup cannot overlap the source. | ||||||
Cost += DesLT.first * TLI->getLMULCost(DesLT.second); | ||||||
lukel97 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
Cost += getShuffleCost(TTI::SK_InsertSubvector, DesTp, {}, CostKind, | ||||||
InsertIndex, SubTp); | ||||||
} | ||||||
return Cost; | ||||||
} | ||||||
return InstructionCost::getInvalid(); | ||||||
} | ||||||
|
||||||
static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST, | ||||||
LLVMContext &C) { | ||||||
assert((DataVT.getScalarSizeInBits() != 8 || | ||||||
|
@@ -394,6 +437,10 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, | |||||
LT.second, CostKind); | ||||||
} | ||||||
} | ||||||
if (InstructionCost Cost = | ||||||
isMultipleInsertSubvector(Tp, Mask, CostKind); | ||||||
Cost.isValid()) | ||||||
return Cost; | ||||||
} | ||||||
// vrgather + cost of generating the mask constant. | ||||||
// We model this for an unknown mask with a single vrgather. | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py | ||
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -S -mtriple=riscv64 -mattr=+v | FileCheck %s | ||
|
||
define void @test() { | ||
; CHECK-LABEL: 'test' | ||
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | ||
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | ||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | ||
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> | ||
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void | ||
; | ||
entry: | ||
%0 = shufflevector <8 x float> poison, <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | ||
%1 = shufflevector <4 x i16> poison, <4 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | ||
%2 = shufflevector <4 x float> poison, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> | ||
%3 = shufflevector <2 x i1> poison, <2 x i1> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can work out the subvector size in one iteration over the mask rather than with a nested loop, if we iterate over each index and mark when it goes back to zero, then check the rest of the mask is
mask[i] == i % subvecsize
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do you mean "if we iterate over each index and mark when it goes back to zero"?