Skip to content

Commit b6e52f7

Browse files
committed
[VPlan] Compute cost for most opcodes in VPWidenRecipe (NFCI).
1 parent ef8207b commit b6e52f7

File tree

2 files changed

+80
-1
lines changed

2 files changed

+80
-1
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -862,7 +862,8 @@ class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
862862
protected:
863863
/// Compute the cost of this recipe using the legacy cost model and the
864864
/// underlying instructions.
865-
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
865+
virtual InstructionCost computeCost(ElementCount VF,
866+
VPCostContext &Ctx) const;
866867
};
867868

868869
// Helper macro to define common classof implementations for recipes.
@@ -1423,6 +1424,10 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
14231424
/// Produce widened copies of all Ingredients.
14241425
void execute(VPTransformState &State) override;
14251426

1427+
/// Return the cost of this VPWidenRecipe.
1428+
InstructionCost computeCost(ElementCount VF,
1429+
VPCostContext &Ctx) const override;
1430+
14261431
unsigned getOpcode() const { return Opcode; }
14271432

14281433
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1137,6 +1137,80 @@ void VPWidenRecipe::execute(VPTransformState &State) {
11371137
#endif
11381138
}
11391139

1140+
InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
1141+
VPCostContext &Ctx) const {
1142+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1143+
switch (Opcode) {
1144+
case Instruction::FNeg: {
1145+
Type *VectorTy =
1146+
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
1147+
return Ctx.TTI.getArithmeticInstrCost(
1148+
Opcode, VectorTy, CostKind,
1149+
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1150+
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
1151+
}
1152+
1153+
case Instruction::UDiv:
1154+
case Instruction::SDiv:
1155+
case Instruction::SRem:
1156+
case Instruction::URem:
1157+
// More complex computation, let the legacy cost-model handle this for now.
1158+
return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
1159+
case Instruction::Add:
1160+
case Instruction::FAdd:
1161+
case Instruction::Sub:
1162+
case Instruction::FSub:
1163+
case Instruction::Mul:
1164+
case Instruction::FMul:
1165+
case Instruction::FDiv:
1166+
case Instruction::FRem:
1167+
case Instruction::Shl:
1168+
case Instruction::LShr:
1169+
case Instruction::AShr:
1170+
case Instruction::And:
1171+
case Instruction::Or:
1172+
case Instruction::Xor: {
1173+
VPValue *Op2 = getOperand(1);
1174+
// Certain instructions can be cheaper to vectorize if they have a constant
1175+
// second vector operand. One example of this are shifts on x86.
1176+
TargetTransformInfo::OperandValueInfo Op2Info = {
1177+
TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None};
1178+
if (Op2->isLiveIn())
1179+
Op2Info = Ctx.TTI.getOperandInfo(Op2->getLiveInIRValue());
1180+
1181+
if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
1182+
getOperand(1)->isDefinedOutsideVectorRegions())
1183+
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
1184+
Type *VectorTy =
1185+
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
1186+
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1187+
1188+
SmallVector<const Value *, 4> Operands;
1189+
if (CtxI)
1190+
Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());
1191+
return Ctx.TTI.getArithmeticInstrCost(
1192+
Opcode, VectorTy, CostKind,
1193+
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1194+
Op2Info, Operands, CtxI);
1195+
}
1196+
case Instruction::Freeze: {
1197+
// This opcode is unknown. Assume that it is the same as 'mul'.
1198+
Type *VectorTy =
1199+
ToVectorTy(Ctx.Types.inferScalarType(this->getVPSingleValue()), VF);
1200+
return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
1201+
}
1202+
case Instruction::ICmp:
1203+
case Instruction::FCmp: {
1204+
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1205+
Type *VectorTy = ToVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);
1206+
return Ctx.TTI.getCmpSelInstrCost(Opcode, VectorTy, nullptr, getPredicate(),
1207+
CostKind, CtxI);
1208+
}
1209+
default:
1210+
llvm_unreachable("Unsupported opcode for instruction");
1211+
}
1212+
}
1213+
11401214
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
11411215
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
11421216
VPSlotTracker &SlotTracker) const {

0 commit comments

Comments
 (0)