diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index c4d5459d25092..752313ab15858 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -899,14 +899,20 @@ class TargetTransformInfo { bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const; + /// Identifies if the vector form of the intrinsic has a scalar operand. bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const; /// Identifies if the vector form of the intrinsic is overloaded on the type /// of the operand at index \p OpdIdx, or on the return type if \p OpdIdx is /// -1. - bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx) const; + bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + int OpdIdx) const; + + /// Identifies if the vector form of the intrinsic that returns a struct is + /// overloaded at the struct element index \p RetIdx. + bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx) const; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or @@ -2002,8 +2008,11 @@ class TargetTransformInfo::Concept { virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) = 0; virtual bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) = 0; - virtual bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx) = 0; + virtual bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + int OpdIdx) = 0; + virtual bool + isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx) = 0; virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind, @@ -2580,9 +2589,14 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { return Impl.isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx); } - bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx) override { - return Impl.isVectorIntrinsicWithOverloadTypeAtArg(ID, ScalarOpdIdx); + bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + int OpdIdx) override { + return Impl.isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx); + } + + bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx) override { + return Impl.isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx); } InstructionCost getScalarizationOverhead(VectorType *Ty, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 48ebffff8cbfc..9c74b2a0c31df 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -396,9 +396,14 @@ class TargetTransformInfoImplBase { return false; } - bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx) const { - return ScalarOpdIdx == -1; + bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + int OpdIdx) const { + return OpdIdx == -1; + } + + bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx) const { + return RetIdx == 0; } InstructionCost getScalarizationOverhead(VectorType *Ty, diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index c1016dd7bdddb..afe86723c5722 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -144,11 +144,25 @@ inline Type *ToVectorTy(Type *Scalar, unsigned VF) { /// This method returns true if the intrinsic's argument types are all scalars /// for the scalar form of the intrinsic and all vectors (or scalars handled by /// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic. +/// +/// Note: isTriviallyVectorizable implies isTriviallyScalarizable. bool isTriviallyVectorizable(Intrinsic::ID ID); +/// Identify if the intrinsic is trivially scalarizable. +/// This method returns true following the same predicates of +/// isTriviallyVectorizable. + +/// Note: There are intrinsics where implementing vectorization for the +/// intrinsic is redundant, but we want to implement scalarization of the +/// vector. To prevent the requirement that an intrinsic also implements +/// vectorization we provide this seperate function. +bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI); + /// Identifies if the vector form of the intrinsic has a scalar operand. -bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, - unsigned ScalarOpdIdx); +/// \p TTI is used to consider target specific intrinsics, if no target specific +/// intrinsics will be considered then it is appropriate to pass in nullptr. +bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, + const TargetTransformInfo *TTI); /// Identifies if the vector form of the intrinsic is overloaded on the type of /// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1. @@ -158,9 +172,11 @@ bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI); /// Identifies if the vector form of the intrinsic that returns a struct is -/// overloaded at the struct element index \p RetIdx. -bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, - int RetIdx); +/// overloaded at the struct element index \p RetIdx. /// \p TTI is used to +/// consider target specific intrinsics, if no target specific intrinsics +/// will be considered then it is appropriate to pass in nullptr. +bool isVectorIntrinsicWithStructReturnOverloadAtField( + Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI); /// Returns intrinsic ID for call. /// For the input call instruction it finds mapping intrinsic and returns diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 8eef8ea665a26..ed4541f66740e 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -819,9 +819,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return false; } - bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx) const { - return ScalarOpdIdx == -1; + bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + int OpdIdx) const { + return OpdIdx == -1; + } + + bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, + int RetIdx) const { + return RetIdx == 0; } /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead. diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 3d5022e5502e2..88533f2972fa6 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -3447,7 +3447,7 @@ static Constant *ConstantFoldFixedVectorCall( // Gather a column of constants. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { // Some intrinsics use a scalar type for certain arguments. - if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J)) { + if (isVectorIntrinsicWithScalarOpAtArg(IntrinsicID, J, /*TTI=*/nullptr)) { Lane[J] = Operands[J]; continue; } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index d4b6c08c5a32b..b32dffa9f0fe8 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -615,9 +615,14 @@ bool TargetTransformInfo::isTargetIntrinsicWithScalarOpAtArg( return TTIImpl->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx); } -bool TargetTransformInfo::isVectorIntrinsicWithOverloadTypeAtArg( - Intrinsic::ID ID, int ScalarOpdIdx) const { - return TTIImpl->isVectorIntrinsicWithOverloadTypeAtArg(ID, ScalarOpdIdx); +bool TargetTransformInfo::isTargetIntrinsicWithOverloadTypeAtArg( + Intrinsic::ID ID, int OpdIdx) const { + return TTIImpl->isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx); +} + +bool TargetTransformInfo::isTargetIntrinsicWithStructReturnOverloadAtField( + Intrinsic::ID ID, int RetIdx) const { + return TTIImpl->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx); } InstructionCost TargetTransformInfo::getScalarizationOverhead( diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 19306679cf6db..6c2502ce21cca 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -113,9 +113,31 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) { } } +bool llvm::isTriviallyScalarizable(Intrinsic::ID ID, + const TargetTransformInfo *TTI) { + if (isTriviallyVectorizable(ID)) + return true; + + if (TTI && Intrinsic::isTargetIntrinsic(ID)) + return TTI->isTargetIntrinsicTriviallyScalarizable(ID); + + // TODO: Move frexp to isTriviallyVectorizable. + // https://github.com/llvm/llvm-project/issues/112408 + switch (ID) { + case Intrinsic::frexp: + return true; + } + return false; +} + /// Identifies if the vector form of the intrinsic has a scalar operand. bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, - unsigned ScalarOpdIdx) { + unsigned ScalarOpdIdx, + const TargetTransformInfo *TTI) { + + if (TTI && Intrinsic::isTargetIntrinsic(ID)) + return TTI->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx); + switch (ID) { case Intrinsic::abs: case Intrinsic::vp_abs: @@ -142,7 +164,7 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg( assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!"); if (TTI && Intrinsic::isTargetIntrinsic(ID)) - return TTI->isVectorIntrinsicWithOverloadTypeAtArg(ID, OpdIdx); + return TTI->isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx); if (VPCastIntrinsic::isVPCast(ID)) return OpdIdx == -1 || OpdIdx == 0; @@ -167,8 +189,12 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg( } } -bool llvm::isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, - int RetIdx) { +bool llvm::isVectorIntrinsicWithStructReturnOverloadAtField( + Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI) { + + if (TTI && Intrinsic::isTargetIntrinsic(ID)) + return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx); + switch (ID) { case Intrinsic::frexp: return RetIdx == 0 || RetIdx == 1; diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp index 8d457f58e6eed..a87c2063b1e35 100644 --- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp +++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp @@ -121,7 +121,7 @@ static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI, auto *ArgTy = Arg.value()->getType(); bool IsOloadTy = isVectorIntrinsicWithOverloadTypeAtArg(IID, Arg.index(), /*TTI=*/nullptr); - if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index())) { + if (isVectorIntrinsicWithScalarOpAtArg(IID, Arg.index(), /*TTI=*/nullptr)) { ScalarArgTypes.push_back(ArgTy); if (IsOloadTy) OloadTys.push_back(ArgTy); diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 2ca4e23594d56..4be1326085bc0 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -25,13 +25,13 @@ bool DirectXTTIImpl::isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, } } -bool DirectXTTIImpl::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx) { +bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, + int OpdIdx) { switch (ID) { case Intrinsic::dx_asdouble: - return ScalarOpdIdx == 0; + return OpdIdx == 0; default: - return ScalarOpdIdx == -1; + return OpdIdx == -1; } } diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h index a18e4a2862575..992d0483de93e 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.h @@ -37,8 +37,7 @@ class DirectXTTIImpl : public BasicTTIImplBase { bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const; bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx); - bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, - int ScalarOpdIdx); + bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index 3b701e6ca0976..2b27150112ad8 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -279,8 +279,6 @@ class ScalarizerVisitor : public InstVisitor { bool visit(Function &F); - bool isTriviallyScalarizable(Intrinsic::ID ID); - // InstVisitor methods. They return true if the instruction was scalarized, // false if nothing changed. bool visitInstruction(Instruction &I) { return false; } @@ -683,19 +681,6 @@ bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) { return true; } -bool ScalarizerVisitor::isTriviallyScalarizable(Intrinsic::ID ID) { - if (isTriviallyVectorizable(ID)) - return true; - // TODO: Move frexp to isTriviallyVectorizable. - // https://github.com/llvm/llvm-project/issues/112408 - switch (ID) { - case Intrinsic::frexp: - return true; - } - return Intrinsic::isTargetIntrinsic(ID) && - TTI->isTargetIntrinsicTriviallyScalarizable(ID); -} - /// If a call to a vector typed intrinsic function, split into a scalar call per /// element if possible for the intrinsic. bool ScalarizerVisitor::splitCall(CallInst &CI) { @@ -715,7 +700,7 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { Intrinsic::ID ID = F->getIntrinsicID(); - if (ID == Intrinsic::not_intrinsic || !isTriviallyScalarizable(ID)) + if (ID == Intrinsic::not_intrinsic || !isTriviallyScalarizable(ID, TTI)) return false; // unsigned NumElems = VT->getNumElements(); @@ -743,7 +728,7 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { // will only scalarize when the struct elements have the same bitness. if (!CurrVS || CurrVS->NumPacked != VS->NumPacked) return false; - if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I)) + if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, I, TTI)) Tys.push_back(CurrVS->SplitTy); } } @@ -794,8 +779,7 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) { Tys[0] = VS->RemainderTy; for (unsigned J = 0; J != NumArgs; ++J) { - if (isVectorIntrinsicWithScalarOpAtArg(ID, J) || - TTI->isTargetIntrinsicWithScalarOpAtArg(ID, J)) { + if (isVectorIntrinsicWithScalarOpAtArg(ID, J, TTI)) { ScalarCallOps.push_back(ScalarOperands[J]); } else { ScalarCallOps.push_back(Scattered[J][I]); @@ -1089,7 +1073,7 @@ bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) { if (!F) return false; Intrinsic::ID ID = F->getIntrinsicID(); - if (ID == Intrinsic::not_intrinsic || !isTriviallyScalarizable(ID)) + if (ID == Intrinsic::not_intrinsic || !isTriviallyScalarizable(ID, TTI)) return false; // Note: Fall through means Operand is a`CallInst` and it is defined in // `isTriviallyScalarizable`. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 555c8435dd330..3ced290237b3a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -927,7 +927,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { auto *SE = PSE.getSE(); Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx) - if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx)) { + if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) { if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) { reportVectorizationFailure("Found unvectorizable intrinsic", diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3bd983ee6e125..2e2c8db2307c2 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1091,7 +1091,8 @@ static bool allSameType(ArrayRef VL) { /// \returns True if in-tree use also needs extract. This refers to /// possible scalar operand in vectorized instruction. static bool doesInTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, - TargetLibraryInfo *TLI) { + TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI) { if (!UserInst) return false; unsigned Opcode = UserInst->getOpcode(); @@ -1108,7 +1109,7 @@ static bool doesInTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, CallInst *CI = cast(UserInst); Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); return any_of(enumerate(CI->args()), [&](auto &&Arg) { - return isVectorIntrinsicWithScalarOpAtArg(ID, Arg.index()) && + return isVectorIntrinsicWithScalarOpAtArg(ID, Arg.index(), TTI) && Arg.value().get() == Scalar; }); } @@ -6505,7 +6506,7 @@ void BoUpSLP::buildExternalUses( // be used. if (UseEntry->State == TreeEntry::ScatterVectorize || !doesInTreeUserNeedToExtract( - Scalar, getRootEntryInstruction(*UseEntry), TLI)) { + Scalar, getRootEntryInstruction(*UseEntry), TLI, TTI)) { LLVM_DEBUG(dbgs() << "SLP: \tInternal user will be removed:" << *U << ".\n"); assert(!UseEntry->isGather() && "Bad state"); @@ -7830,7 +7831,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( unsigned NumArgs = CI->arg_size(); SmallVector ScalarArgs(NumArgs, nullptr); for (unsigned J = 0; J != NumArgs; ++J) - if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) + if (isVectorIntrinsicWithScalarOpAtArg(ID, J, TTI)) ScalarArgs[J] = CI->getArgOperand(J); for (Value *V : VL) { CallInst *CI2 = dyn_cast(V); @@ -7846,7 +7847,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( // Some intrinsics have scalar arguments and should be same in order for // them to be vectorized. for (unsigned J = 0; J != NumArgs; ++J) { - if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) { + if (isVectorIntrinsicWithScalarOpAtArg(ID, J, TTI)) { Value *A1J = CI2->getArgOperand(J); if (ScalarArgs[J] != A1J) { LLVM_DEBUG(dbgs() @@ -8718,7 +8719,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, for (unsigned I : seq(CI->arg_size())) { // For scalar operands no need to create an entry since no need to // vectorize it. - if (isVectorIntrinsicWithScalarOpAtArg(ID, I)) + if (isVectorIntrinsicWithScalarOpAtArg(ID, I, TTI)) continue; buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I}); } @@ -10948,14 +10949,14 @@ TTI::CastContextHint BoUpSLP::getCastContextHint(const TreeEntry &TE) const { /// Builds the arguments types vector for the given call instruction with the /// given \p ID for the specified vector factor. -static SmallVector buildIntrinsicArgTypes(const CallInst *CI, - const Intrinsic::ID ID, - const unsigned VF, - unsigned MinBW) { +static SmallVector +buildIntrinsicArgTypes(const CallInst *CI, const Intrinsic::ID ID, + const unsigned VF, unsigned MinBW, + const TargetTransformInfo *TTI) { SmallVector ArgTys; for (auto [Idx, Arg] : enumerate(CI->args())) { if (ID != Intrinsic::not_intrinsic) { - if (isVectorIntrinsicWithScalarOpAtArg(ID, Idx)) { + if (isVectorIntrinsicWithScalarOpAtArg(ID, Idx, TTI)) { ArgTys.push_back(Arg->getType()); continue; } @@ -11657,9 +11658,9 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, auto GetVectorCost = [=](InstructionCost CommonCost) { auto *CI = cast(VL0); Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); - SmallVector ArgTys = - buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(), - It != MinBWs.end() ? It->second.first : 0); + SmallVector ArgTys = buildIntrinsicArgTypes( + CI, ID, VecTy->getNumElements(), + It != MinBWs.end() ? It->second.first : 0, TTI); auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys); return std::min(VecCallCosts.first, VecCallCosts.second) + CommonCost; }; @@ -15817,9 +15818,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); - SmallVector ArgTys = - buildIntrinsicArgTypes(CI, ID, VecTy->getNumElements(), - It != MinBWs.end() ? It->second.first : 0); + SmallVector ArgTys = buildIntrinsicArgTypes( + CI, ID, VecTy->getNumElements(), + It != MinBWs.end() ? It->second.first : 0, TTI); auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI, ArgTys); bool UseIntrinsic = ID != Intrinsic::not_intrinsic && VecCallCosts.first <= VecCallCosts.second; @@ -15835,7 +15836,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { ValueList OpVL; // Some intrinsics have scalar arguments. This argument should not be // vectorized. - if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I)) { + if (UseIntrinsic && isVectorIntrinsicWithScalarOpAtArg(ID, I, TTI)) { ScalarArg = CEI->getArgOperand(I); // if decided to reduce bitwidth of abs intrinsic, it second argument // must be set false (do not return poison, if value issigned min). @@ -16374,7 +16375,7 @@ BoUpSLP::vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues, E->State == TreeEntry::StridedVectorize) && doesInTreeUserNeedToExtract( Scalar, getRootEntryInstruction(*UseEntry), - TLI); + TLI, TTI); })) && "Scalar with nullptr User must be registered in " "ExternallyUsedValues map or remain as scalar in vectorized " @@ -17870,7 +17871,8 @@ bool BoUpSLP::collectValuesToDemote( // Choose the best bitwidth based on cost estimations. auto Checker = [&](unsigned BitWidth, unsigned) { unsigned MinBW = PowerOf2Ceil(BitWidth); - SmallVector ArgTys = buildIntrinsicArgTypes(IC, ID, VF, MinBW); + SmallVector ArgTys = + buildIntrinsicArgTypes(IC, ID, VF, MinBW, TTI); auto VecCallCosts = getVectorCallCosts( IC, getWidenedType(IntegerType::get(IC->getContext(), MinBW), VF), TTI, TLI, ArgTys); diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 02774d8e5c5fe..61187c9f6f1a0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -971,7 +971,8 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { // Some intrinsics have a scalar argument - don't replace it with a // vector. Value *Arg; - if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index())) + if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(), + State.TTI)) Arg = State.get(I.value(), VPLane(0)); else Arg = State.get(I.value(), onlyFirstLaneUsed(I.value())); diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 09489e2498453..16446e7174c22 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -1950,7 +1950,7 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) { return false; for (unsigned I = 0, E = II0->arg_size(); I != E; ++I) - if (isVectorIntrinsicWithScalarOpAtArg(IID, I) && + if (isVectorIntrinsicWithScalarOpAtArg(IID, I, &TTI) && II0->getArgOperand(I) != II1->getArgOperand(I)) return false; @@ -1963,7 +1963,7 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) { SmallVector NewArgsTy; InstructionCost NewCost = 0; for (unsigned I = 0, E = II0->arg_size(); I != E; ++I) - if (isVectorIntrinsicWithScalarOpAtArg(IID, I)) { + if (isVectorIntrinsicWithScalarOpAtArg(IID, I, &TTI)) { NewArgsTy.push_back(II0->getArgOperand(I)->getType()); } else { auto *VecTy = cast(II0->getArgOperand(I)->getType()); @@ -1984,7 +1984,7 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) { SmallVector NewArgs; for (unsigned I = 0, E = II0->arg_size(); I != E; ++I) - if (isVectorIntrinsicWithScalarOpAtArg(IID, I)) { + if (isVectorIntrinsicWithScalarOpAtArg(IID, I, &TTI)) { NewArgs.push_back(II0->getArgOperand(I)); } else { Value *Shuf = Builder.CreateShuffleVector(II0->getArgOperand(I), @@ -2075,7 +2075,8 @@ static Value *generateNewInstTree(ArrayRef Item, FixedVectorType *Ty, const SmallPtrSet &IdentityLeafs, const SmallPtrSet &SplatLeafs, const SmallPtrSet &ConcatLeafs, - IRBuilder<> &Builder) { + IRBuilder<> &Builder, + const TargetTransformInfo *TTI) { auto [FrontU, FrontLane] = Item.front(); if (IdentityLeafs.contains(FrontU)) { @@ -2110,13 +2111,14 @@ static Value *generateNewInstTree(ArrayRef Item, FixedVectorType *Ty, unsigned NumOps = I->getNumOperands() - (II ? 1 : 0); SmallVector Ops(NumOps); for (unsigned Idx = 0; Idx < NumOps; Idx++) { - if (II && isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Idx)) { + if (II && + isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Idx, TTI)) { Ops[Idx] = II->getOperand(Idx); continue; } - Ops[Idx] = - generateNewInstTree(generateInstLaneVectorFromOperand(Item, Idx), Ty, - IdentityLeafs, SplatLeafs, ConcatLeafs, Builder); + Ops[Idx] = generateNewInstTree(generateInstLaneVectorFromOperand(Item, Idx), + Ty, IdentityLeafs, SplatLeafs, ConcatLeafs, + Builder, TTI); } SmallVector ValueList; @@ -2288,7 +2290,8 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { II && isTriviallyVectorizable(II->getIntrinsicID()) && !II->hasOperandBundles()) { for (unsigned Op = 0, E = II->getNumOperands() - 1; Op < E; Op++) { - if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op)) { + if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(), Op, + &TTI)) { if (!all_of(drop_begin(Item), [Item, Op](InstLane &IL) { Value *FrontV = Item.front().first->get(); Use *U = IL.first; @@ -2319,7 +2322,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { // removed. Scan through again and generate the new tree of instructions. Builder.SetInsertPoint(&I); Value *V = generateNewInstTree(Start, Ty, IdentityLeafs, SplatLeafs, - ConcatLeafs, Builder); + ConcatLeafs, Builder, &TTI); replaceValue(I, *V); return true; }