-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[VPlan] Add transformation to narrow interleave groups. #106441
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
36c68a2
725a1e7
8252b0c
00471e2
61279d1
885984d
d4cd3aa
9d67dcd
3637cfb
f2dcf3d
4693c6b
09f2ee5
ee6b265
1937f99
8edad6b
f08c313
9312264
8aa6cd6
b5ada93
95cf546
1110761
521d8fc
3494339
ac323a7
3599a52
7755ba9
3fd2b8d
b9b4fc2
89d4f13
e127e33
4742f67
b45c3aa
86ac70a
315de55
b79c14c
0226cb0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -2236,6 +2236,36 @@ void VPlanTransforms::materializeBroadcasts(VPlan &Plan) { | |||||
} | ||||||
} | ||||||
|
||||||
/// Returns true if \p V is VPWidenLoadRecipe or VPInterleaveRecipe that can be | ||||||
/// converted to a narrower recipe. \p V is used by a wide recipe \p WideMember | ||||||
/// that feeds a store interleave group at index \p Idx, \p WideMember0 is the | ||||||
/// recipe feeding the same interleave group at index 0. A VPWidenLoadRecipe can | ||||||
/// be narrowed to an index-independent load if it feeds all wide ops at all | ||||||
/// indices (checked by via the operands of the wide recipe at lane0, \p | ||||||
/// WideMember0). A VPInterleaveRecipe can be narrowed to a wide load, if \p V | ||||||
/// is defined at \p Idx of a load interleave group. | ||||||
static bool canNarrowLoad(VPWidenRecipe *WideMember0, VPWidenRecipe *WideMember, | ||||||
VPValue *V, unsigned Idx) { | ||||||
auto *DefR = V->getDefiningRecipe(); | ||||||
if (!DefR) | ||||||
return false; | ||||||
if (auto *W = dyn_cast<VPWidenLoadRecipe>(DefR)) | ||||||
return !W->getMask() && | ||||||
all_of(zip(WideMember0->operands(), WideMember->operands()), | ||||||
[V](const auto P) { | ||||||
// V must be as at the same places in both WideMember0 and | ||||||
// WideMember. | ||||||
const auto &[WideMember0Op, WideMemberOp] = P; | ||||||
return (WideMember0Op == V) == (WideMemberOp == V); | ||||||
}); | ||||||
|
||||||
if (auto *IR = dyn_cast<VPInterleaveRecipe>(DefR)) | ||||||
return IR->getInterleaveGroup()->getFactor() == | ||||||
IR->getInterleaveGroup()->getNumMembers() && | ||||||
IR->getVPValue(Idx) == V; | ||||||
return false; | ||||||
} | ||||||
|
||||||
/// Returns true if \p IR is a full interleave group with factor and number of | ||||||
/// members both equal to \p VF. The interleave group must also access the full | ||||||
/// vector width \p VectorRegWidth. | ||||||
|
@@ -2298,6 +2328,8 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, | |||||
if (R.mayWriteToMemory() && !InterleaveR) | ||||||
return; | ||||||
|
||||||
// All other ops are allowed, but we reject uses that cannot be converted | ||||||
// when checking all allowed consumers (store interleave groups) below. | ||||||
if (!InterleaveR) | ||||||
continue; | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All non-IR loads are allowed, but all IR loads must be consecutive. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we are checking all consumers, add comment, thanks! |
||||||
|
@@ -2312,7 +2344,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, | |||||
|
||||||
// For now, we only support full interleave groups storing load interleave | ||||||
// groups. | ||||||
if (!all_of(enumerate(InterleaveR->getStoredValues()), [](auto Op) { | ||||||
if (all_of(enumerate(InterleaveR->getStoredValues()), [](auto Op) { | ||||||
VPRecipeBase *DefR = Op.value()->getDefiningRecipe(); | ||||||
if (!DefR) | ||||||
return false; | ||||||
|
@@ -2322,31 +2354,67 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF, | |||||
IR->getInterleaveGroup()->getNumMembers() && | ||||||
IR->getVPValue(Op.index()) == Op.value(); | ||||||
})) { | ||||||
StoreGroups.push_back(InterleaveR); | ||||||
continue; | ||||||
} | ||||||
|
||||||
// Check if all values feeding InterleaveR are matching wide recipes, which | ||||||
// operands that can be narrowed. | ||||||
auto *WideMember0 = dyn_cast_or_null<VPWidenRecipe>( | ||||||
InterleaveR->getStoredValues()[0]->getDefiningRecipe()); | ||||||
if (!WideMember0) | ||||||
return; | ||||||
for (const auto &[I, V] : enumerate(InterleaveR->getStoredValues())) { | ||||||
auto *R = dyn_cast<VPWidenRecipe>(V->getDefiningRecipe()); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At this point, it must have a defining recipe, as guaranteed by canNarrowLoad. To be generalized in the future. |
||||||
if (!R || R->getOpcode() != WideMember0->getOpcode() || | ||||||
R->getNumOperands() > 2) | ||||||
return; | ||||||
if (any_of(R->operands(), [WideMember0, Idx = I, R](VPValue *V) { | ||||||
return !canNarrowLoad(WideMember0, R, V, Idx); | ||||||
})) | ||||||
return; | ||||||
} | ||||||
StoreGroups.push_back(InterleaveR); | ||||||
} | ||||||
|
||||||
if (StoreGroups.empty()) | ||||||
return; | ||||||
|
||||||
// Convert InterleaveGroup R to a single VPWidenLoadRecipe. | ||||||
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe. | ||||||
auto NarrowOp = [](VPRecipeBase *R) -> VPValue * { | ||||||
auto *LoadGroup = cast<VPInterleaveRecipe>(R); | ||||||
// Narrow interleave group to wide load, as transformed VPlan will only | ||||||
if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) { | ||||||
// Narrow interleave group to wide load, as transformed VPlan will only | ||||||
// process one original iteration. | ||||||
auto *L = new VPWidenLoadRecipe( | ||||||
*cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()), | ||||||
LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true, | ||||||
/*Reverse=*/false, LoadGroup->getDebugLoc()); | ||||||
L->insertBefore(LoadGroup); | ||||||
return L; | ||||||
} | ||||||
|
||||||
auto *WideLoad = cast<VPWidenLoadRecipe>(R); | ||||||
|
||||||
// Narrow wide load to uniform scalar load, as transformed VPlan will only | ||||||
// process one original iteration. | ||||||
auto *L = new VPWidenLoadRecipe( | ||||||
*cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos()), | ||||||
LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true, | ||||||
/*Reverse=*/false, LoadGroup->getDebugLoc()); | ||||||
L->insertBefore(LoadGroup); | ||||||
return L; | ||||||
auto *N = new VPReplicateRecipe(&WideLoad->getIngredient(), | ||||||
WideLoad->operands(), /*IsUniform*/ true); | ||||||
N->insertBefore(WideLoad); | ||||||
return N; | ||||||
}; | ||||||
|
||||||
// Narrow operation tree rooted at store groups. | ||||||
for (auto *StoreGroup : StoreGroups) { | ||||||
VPValue *Res = | ||||||
NarrowOp(StoreGroup->getStoredValues()[0]->getDefiningRecipe()); | ||||||
VPValue *Res = nullptr; | ||||||
if (auto *WideMember0 = dyn_cast<VPWidenRecipe>( | ||||||
StoreGroup->getStoredValues()[0]->getDefiningRecipe())) { | ||||||
for (unsigned Idx = 0, E = WideMember0->getNumOperands(); Idx != E; ++Idx) | ||||||
WideMember0->setOperand( | ||||||
Idx, NarrowOp(WideMember0->getOperand(Idx)->getDefiningRecipe())); | ||||||
Res = WideMember0; | ||||||
} else { | ||||||
Res = NarrowOp(StoreGroup->getStoredValues()[0]->getDefiningRecipe()); | ||||||
} | ||||||
|
||||||
auto *S = new VPWidenStoreRecipe( | ||||||
*cast<StoreInst>(StoreGroup->getInterleaveGroup()->getInsertPos()), | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All IR's are checked to be "consecutive" when collecting StoreGroups in narrowInterleaveGroups(), including all load IR's. So here IR's factor is known to be equal to |members|. Moreover, both are known to be equal to VF. Implying that IR defines at-least Idx(+1) values, so it's ok to getVPValue(Idx).