Skip to content

Commit 3d36e5c

Browse files
committed
Only promote args when function attributes are compatible
Summary: Check to make sure that the caller and the callee have compatible function arguments before promoting arguments. This uses the same TargetTransformInfo queries that are used to determine if attributes are compatible for inlining. The goal here is to avoid breaking ABI when a called function's ABI depends on a target feature that is not enabled in the caller. This is a very conservative fix for PR37358. Ideally we would have a more sophisticated check for ABI compatiblity rather than checking if the attributes are compatible for inlining. Reviewers: echristo, chandlerc, eli.friedman, craig.topper Reviewed By: echristo, chandlerc Subscribers: nikic, xbolva00, rkruppe, alexcrichton, llvm-commits Differential Revision: https://reviews.llvm.org/D53554 llvm-svn: 351296
1 parent a5b0e55 commit 3d36e5c

File tree

5 files changed

+114
-4
lines changed

5 files changed

+114
-4
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

+16
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,14 @@ class TargetTransformInfo {
934934
bool areInlineCompatible(const Function *Caller,
935935
const Function *Callee) const;
936936

937+
/// \returns True if the caller and callee agree on how \p Args will be passed
938+
/// to the callee.
939+
/// \param[out] Args The list of compatible arguments. The implementation may
940+
/// filter out any incompatible args from this list.
941+
bool areFunctionArgsABICompatible(const Function *Caller,
942+
const Function *Callee,
943+
SmallPtrSetImpl<Argument *> &Args) const;
944+
937945
/// The type of load/store indexing.
938946
enum MemIndexedMode {
939947
MIM_Unindexed, ///< No indexing.
@@ -1179,6 +1187,9 @@ class TargetTransformInfo::Concept {
11791187
unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
11801188
virtual bool areInlineCompatible(const Function *Caller,
11811189
const Function *Callee) const = 0;
1190+
virtual bool
1191+
areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
1192+
SmallPtrSetImpl<Argument *> &Args) const = 0;
11821193
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
11831194
virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
11841195
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
@@ -1557,6 +1568,11 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
15571568
const Function *Callee) const override {
15581569
return Impl.areInlineCompatible(Caller, Callee);
15591570
}
1571+
bool areFunctionArgsABICompatible(
1572+
const Function *Caller, const Function *Callee,
1573+
SmallPtrSetImpl<Argument *> &Args) const override {
1574+
return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);
1575+
}
15601576
bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
15611577
return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
15621578
}

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

+8
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,14 @@ class TargetTransformInfoImplBase {
526526
Callee->getFnAttribute("target-features"));
527527
}
528528

529+
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
530+
SmallPtrSetImpl<Argument *> &Args) const {
531+
return (Caller->getFnAttribute("target-cpu") ==
532+
Callee->getFnAttribute("target-cpu")) &&
533+
(Caller->getFnAttribute("target-features") ==
534+
Callee->getFnAttribute("target-features"));
535+
}
536+
529537
bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
530538
const DataLayout &DL) const {
531539
return false;

llvm/lib/Analysis/TargetTransformInfo.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,12 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
625625
return TTIImpl->areInlineCompatible(Caller, Callee);
626626
}
627627

628+
bool TargetTransformInfo::areFunctionArgsABICompatible(
629+
const Function *Caller, const Function *Callee,
630+
SmallPtrSetImpl<Argument *> &Args) const {
631+
return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
632+
}
633+
628634
bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
629635
Type *Ty) const {
630636
return TTIImpl->isIndexedLoadLegal(Mode, Ty);

llvm/lib/Transforms/IPO/ArgumentPromotion.cpp

+31-4
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
#include "llvm/Analysis/Loads.h"
5050
#include "llvm/Analysis/MemoryLocation.h"
5151
#include "llvm/Analysis/TargetLibraryInfo.h"
52+
#include "llvm/Analysis/TargetTransformInfo.h"
5253
#include "llvm/IR/Argument.h"
5354
#include "llvm/IR/Attributes.h"
5455
#include "llvm/IR/BasicBlock.h"
@@ -809,6 +810,21 @@ static bool canPaddingBeAccessed(Argument *arg) {
809810
return false;
810811
}
811812

813+
static bool areFunctionArgsABICompatible(
814+
const Function &F, const TargetTransformInfo &TTI,
815+
SmallPtrSetImpl<Argument *> &ArgsToPromote,
816+
SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
817+
for (const Use &U : F.uses()) {
818+
CallSite CS(U.getUser());
819+
const Function *Caller = CS.getCaller();
820+
const Function *Callee = CS.getCalledFunction();
821+
if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) ||
822+
!TTI.areFunctionArgsABICompatible(Caller, Callee, ByValArgsToTransform))
823+
return false;
824+
}
825+
return true;
826+
}
827+
812828
/// PromoteArguments - This method checks the specified function to see if there
813829
/// are any promotable arguments and if it is safe to promote the function (for
814830
/// example, all callers are direct). If safe to promote some arguments, it
@@ -817,7 +833,8 @@ static Function *
817833
promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
818834
unsigned MaxElements,
819835
Optional<function_ref<void(CallSite OldCS, CallSite NewCS)>>
820-
ReplaceCallSite) {
836+
ReplaceCallSite,
837+
const TargetTransformInfo &TTI) {
821838
// Don't perform argument promotion for naked functions; otherwise we can end
822839
// up removing parameters that are seemingly 'not used' as they are referred
823840
// to in the assembly.
@@ -846,7 +863,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
846863

847864
// Second check: make sure that all callers are direct callers. We can't
848865
// transform functions that have indirect callers. Also see if the function
849-
// is self-recursive.
866+
// is self-recursive and check that target features are compatible.
850867
bool isSelfRecursive = false;
851868
for (Use &U : F->uses()) {
852869
CallSite CS(U.getUser());
@@ -955,6 +972,10 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
955972
if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
956973
return nullptr;
957974

975+
if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote,
976+
ByValArgsToTransform))
977+
return nullptr;
978+
958979
return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);
959980
}
960981

@@ -980,7 +1001,9 @@ PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,
9801001
return FAM.getResult<AAManager>(F);
9811002
};
9821003

983-
Function *NewF = promoteArguments(&OldF, AARGetter, MaxElements, None);
1004+
const TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(OldF);
1005+
Function *NewF =
1006+
promoteArguments(&OldF, AARGetter, MaxElements, None, TTI);
9841007
if (!NewF)
9851008
continue;
9861009
LocalChange = true;
@@ -1018,6 +1041,7 @@ struct ArgPromotion : public CallGraphSCCPass {
10181041
void getAnalysisUsage(AnalysisUsage &AU) const override {
10191042
AU.addRequired<AssumptionCacheTracker>();
10201043
AU.addRequired<TargetLibraryInfoWrapperPass>();
1044+
AU.addRequired<TargetTransformInfoWrapperPass>();
10211045
getAAResultsAnalysisUsage(AU);
10221046
CallGraphSCCPass::getAnalysisUsage(AU);
10231047
}
@@ -1043,6 +1067,7 @@ INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
10431067
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
10441068
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
10451069
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
1070+
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
10461071
INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
10471072
"Promote 'by reference' arguments to scalars", false, false)
10481073

@@ -1079,8 +1104,10 @@ bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
10791104
CallerNode->replaceCallEdge(OldCS, NewCS, NewCalleeNode);
10801105
};
10811106

1107+
const TargetTransformInfo &TTI =
1108+
getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*OldF);
10821109
if (Function *NewF = promoteArguments(OldF, AARGetter, MaxElements,
1083-
{ReplaceCallSite})) {
1110+
{ReplaceCallSite}, TTI)) {
10841111
LocalChange = true;
10851112

10861113
// Update the call graph for the newly promoted function.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
; RUN: opt -S -argpromotion < %s | FileCheck %s
2+
; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
3+
; Test that we only promote arguments when the caller/callee have compatible
4+
; function attrubtes.
5+
6+
target triple = "x86_64-unknown-linux-gnu"
7+
8+
; CHECK-LABEL: @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1)
9+
define internal fastcc void @no_promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
10+
bb:
11+
%tmp = load <4 x i64>, <4 x i64>* %arg1
12+
store <4 x i64> %tmp, <4 x i64>* %arg
13+
ret void
14+
}
15+
16+
define void @no_promote(<4 x i64>* %arg) #1 {
17+
bb:
18+
%tmp = alloca <4 x i64>, align 32
19+
%tmp2 = alloca <4 x i64>, align 32
20+
%tmp3 = bitcast <4 x i64>* %tmp to i8*
21+
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
22+
call fastcc void @no_promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
23+
%tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
24+
store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
25+
ret void
26+
}
27+
28+
; CHECK-LABEL: @promote_avx2(<4 x i64>* %arg, <4 x i64> %
29+
define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
30+
bb:
31+
%tmp = load <4 x i64>, <4 x i64>* %arg1
32+
store <4 x i64> %tmp, <4 x i64>* %arg
33+
ret void
34+
}
35+
36+
define void @promote(<4 x i64>* %arg) #0 {
37+
bb:
38+
%tmp = alloca <4 x i64>, align 32
39+
%tmp2 = alloca <4 x i64>, align 32
40+
%tmp3 = bitcast <4 x i64>* %tmp to i8*
41+
call void @llvm.memset.p0i8.i64(i8* align 32 %tmp3, i8 0, i64 32, i1 false)
42+
call fastcc void @promote_avx2(<4 x i64>* %tmp2, <4 x i64>* %tmp)
43+
%tmp4 = load <4 x i64>, <4 x i64>* %tmp2, align 32
44+
store <4 x i64> %tmp4, <4 x i64>* %arg, align 2
45+
ret void
46+
}
47+
48+
; Function Attrs: argmemonly nounwind
49+
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #2
50+
51+
attributes #0 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" }
52+
attributes #1 = { nounwind uwtable }
53+
attributes #2 = { argmemonly nounwind }

0 commit comments

Comments
 (0)