Skip to content

[LoopVectorizer] Allow partial reductions to be made in predicated loops #124268

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
28 changes: 18 additions & 10 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8757,13 +8757,6 @@ bool VPRecipeBuilder::getScaledReductions(
if (!CM.TheLoop->contains(RdxExitInstr))
return false;

// TODO: Allow scaling reductions when predicating. The select at
// the end of the loop chooses between the phi value and most recent
// reduction result, both of which have different VFs to the active lane
// mask when scaling.
if (CM.blockNeedsPredicationForAnyReason(RdxExitInstr->getParent()))
return false;

auto *Update = dyn_cast<BinaryOperator>(RdxExitInstr);
if (!Update)
return false;
Expand Down Expand Up @@ -8925,8 +8918,19 @@ VPRecipeBuilder::tryToCreatePartialReduction(Instruction *Reduction,
isa<VPPartialReductionRecipe>(BinOpRecipe))
std::swap(BinOp, Accumulator);

return new VPPartialReductionRecipe(Reduction->getOpcode(), BinOp,
Accumulator, Reduction);
unsigned ReductionOpcode = Reduction->getOpcode();
if (CM.blockNeedsPredicationForAnyReason(Reduction->getParent())) {
assert((ReductionOpcode == Instruction::Add ||
ReductionOpcode == Instruction::Sub) &&
"Expected an ADD or SUB operation for predicated partial "
"reductions (because the neutral element in the mask is zero)!");
VPValue *Mask = getBlockInMask(Reduction->getParent());
VPValue *Zero =
Plan.getOrAddLiveIn(ConstantInt::get(Reduction->getType(), 0));
BinOp = Builder.createSelect(Mask, BinOp, Zero, Reduction->getDebugLoc());
}
return new VPPartialReductionRecipe(ReductionOpcode, BinOp, Accumulator,
Reduction);
}

void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
Expand Down Expand Up @@ -9734,7 +9738,11 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// beginning of the dedicated latch block.
auto *OrigExitingVPV = PhiR->getBackedgeValue();
auto *NewExitingVPV = PhiR->getBackedgeValue();
if (!PhiR->isInLoop() && CM.foldTailByMasking()) {
// Don't output selects for partial reductions because they have an output
// with fewer lanes than the VF. So the operands of the select would have
// different numbers of lanes. Partial reductions mask the input instead.
if (!PhiR->isInLoop() && CM.foldTailByMasking() &&
!isa<VPPartialReductionRecipe>(OrigExitingVPV->getDefiningRecipe())) {
VPValue *Cond = RecipeBuilder.getBlockInMask(OrigLoop->getHeader());
assert(OrigExitingVPV->getDefiningRecipe()->getParent() != LatchVPBB &&
"reduction recipe must be defined before latch");
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,13 @@ VPPartialReductionRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
std::optional<unsigned> Opcode = std::nullopt;
VPRecipeBase *BinOpR = getOperand(0)->getDefiningRecipe();

// If the partial reduction is predicated, a select will be operand 0 rather
// than the binary op
using namespace llvm::VPlanPatternMatch;
if (match(getOperand(0), m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
BinOpR = BinOpR->getOperand(1)->getDefiningRecipe();

if (auto *WidenR = dyn_cast<VPWidenRecipe>(BinOpR))
Opcode = std::make_optional(WidenR->getOpcode());

Expand Down
Loading