Skip to content

Commit dba7329

Browse files
committed
[GC] CodeGenPrep transform: simplify offsetable relocate
The transform is somewhat involved, but the basic idea is simple: find derived pointers that have been offset from the base pointer using gep and replace the relocate of the derived pointer with a gep to the relocated base pointer (with the same offset). llvm-svn: 226060
1 parent 9ffa728 commit dba7329

File tree

3 files changed

+245
-3
lines changed

3 files changed

+245
-3
lines changed

llvm/include/llvm/IR/Statepoint.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,12 +194,12 @@ class GCRelocateOperands {
194194
/// The index into the associate statepoint's argument list
195195
/// which contains the base pointer of the pointer whose
196196
/// relocation this gc.relocate describes.
197-
int basePtrIndex() {
197+
unsigned basePtrIndex() {
198198
return cast<ConstantInt>(RelocateCS.getArgument(1))->getZExtValue();
199199
}
200200
/// The index into the associate statepoint's argument list which
201201
/// contains the pointer whose relocation this gc.relocate describes.
202-
int derivedPtrIndex() {
202+
unsigned derivedPtrIndex() {
203203
return cast<ConstantInt>(RelocateCS.getArgument(2))->getZExtValue();
204204
}
205205
Value *basePtr() {

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 155 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include "llvm/IR/IntrinsicInst.h"
3333
#include "llvm/IR/MDBuilder.h"
3434
#include "llvm/IR/PatternMatch.h"
35+
#include "llvm/IR/Statepoint.h"
3536
#include "llvm/IR/ValueHandle.h"
3637
#include "llvm/IR/ValueMap.h"
3738
#include "llvm/Pass.h"
@@ -72,6 +73,10 @@ static cl::opt<bool> DisableBranchOpts(
7273
"disable-cgp-branch-opts", cl::Hidden, cl::init(false),
7374
cl::desc("Disable branch optimizations in CodeGenPrepare"));
7475

76+
static cl::opt<bool>
77+
DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
78+
cl::desc("Disable GC optimizations in CodeGenPrepare"));
79+
7580
static cl::opt<bool> DisableSelectToBranch(
7681
"disable-cgp-select2branch", cl::Hidden, cl::init(false),
7782
cl::desc("Disable select to branch conversion."));
@@ -183,6 +188,7 @@ class TypePromotionTransaction;
183188
const SmallVectorImpl<Instruction *> &Exts,
184189
unsigned CreatedInst);
185190
bool splitBranchCondition(Function &F);
191+
bool simplifyOffsetableRelocate(Instruction &I);
186192
};
187193
}
188194

@@ -248,7 +254,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
248254
BasicBlock *BB = I++;
249255
bool ModifiedDTOnIteration = false;
250256
MadeChange |= OptimizeBlock(*BB, ModifiedDTOnIteration);
251-
257+
252258
// Restart BB iteration if the dominator tree of the Function was changed
253259
ModifiedDT |= ModifiedDTOnIteration;
254260
if (ModifiedDTOnIteration)
@@ -298,6 +304,16 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
298304
EverMadeChange |= MadeChange;
299305
}
300306

307+
if (!DisableGCOpts) {
308+
SmallVector<Instruction *, 2> Statepoints;
309+
for (BasicBlock &BB : F)
310+
for (Instruction &I : BB)
311+
if (isStatepoint(I))
312+
Statepoints.push_back(&I);
313+
for (auto &I : Statepoints)
314+
EverMadeChange |= simplifyOffsetableRelocate(*I);
315+
}
316+
301317
if (ModifiedDT && DT)
302318
DT->recalculate(F);
303319

@@ -521,6 +537,144 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
521537
DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
522538
}
523539

540+
// Computes a map of base pointer relocation instructions to corresponding
541+
// derived pointer relocation instructions given a vector of all relocate calls
542+
static void computeBaseDerivedRelocateMap(
543+
const SmallVectorImpl<User *> &AllRelocateCalls,
544+
DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> &
545+
RelocateInstMap) {
546+
// Collect information in two maps: one primarily for locating the base object
547+
// while filling the second map; the second map is the final structure holding
548+
// a mapping between Base and corresponding Derived relocate calls
549+
DenseMap<std::pair<unsigned, unsigned>, IntrinsicInst *> RelocateIdxMap;
550+
for (auto &U : AllRelocateCalls) {
551+
GCRelocateOperands ThisRelocate(U);
552+
IntrinsicInst *I = cast<IntrinsicInst>(U);
553+
auto K = std::make_pair(ThisRelocate.basePtrIndex(),
554+
ThisRelocate.derivedPtrIndex());
555+
RelocateIdxMap.insert(std::make_pair(K, I));
556+
}
557+
for (auto &Item : RelocateIdxMap) {
558+
std::pair<unsigned, unsigned> Key = Item.first;
559+
if (Key.first == Key.second)
560+
// Base relocation: nothing to insert
561+
continue;
562+
563+
IntrinsicInst *I = Item.second;
564+
auto BaseKey = std::make_pair(Key.first, Key.first);
565+
IntrinsicInst *Base = RelocateIdxMap[BaseKey];
566+
if (!Base)
567+
// TODO: We might want to insert a new base object relocate and gep off
568+
// that, if there are enough derived object relocates.
569+
continue;
570+
RelocateInstMap[Base].push_back(I);
571+
}
572+
}
573+
574+
// Accepts a GEP and extracts the operands into a vector provided they're all
575+
// small integer constants
576+
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
577+
SmallVectorImpl<Value *> &OffsetV) {
578+
for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
579+
// Only accept small constant integer operands
580+
auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
581+
if (!Op || Op->getZExtValue() > 20)
582+
return false;
583+
}
584+
585+
for (unsigned i = 1; i < GEP->getNumOperands(); i++)
586+
OffsetV.push_back(GEP->getOperand(i));
587+
return true;
588+
}
589+
590+
// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
591+
// replace, computes a replacement, and affects it.
592+
static bool
593+
simplifyRelocatesOffABase(IntrinsicInst *RelocatedBase,
594+
const SmallVectorImpl<IntrinsicInst *> &Targets) {
595+
bool MadeChange = false;
596+
for (auto &ToReplace : Targets) {
597+
GCRelocateOperands MasterRelocate(RelocatedBase);
598+
GCRelocateOperands ThisRelocate(ToReplace);
599+
600+
assert(ThisRelocate.basePtrIndex() == MasterRelocate.basePtrIndex() &&
601+
"Not relocating a derived object of the original base object");
602+
if (ThisRelocate.basePtrIndex() == ThisRelocate.derivedPtrIndex()) {
603+
// A duplicate relocate call. TODO: coalesce duplicates.
604+
continue;
605+
}
606+
607+
Value *Base = ThisRelocate.basePtr();
608+
auto Derived = dyn_cast<GetElementPtrInst>(ThisRelocate.derivedPtr());
609+
if (!Derived || Derived->getPointerOperand() != Base)
610+
continue;
611+
612+
SmallVector<Value *, 2> OffsetV;
613+
if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
614+
continue;
615+
616+
// Create a Builder and replace the target callsite with a gep
617+
IRBuilder<> Builder(ToReplace);
618+
Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
619+
Value *Replacement =
620+
Builder.CreateGEP(RelocatedBase, makeArrayRef(OffsetV));
621+
Instruction *ReplacementInst = cast<Instruction>(Replacement);
622+
ReplacementInst->removeFromParent();
623+
ReplacementInst->insertAfter(RelocatedBase);
624+
Replacement->takeName(ToReplace);
625+
ToReplace->replaceAllUsesWith(Replacement);
626+
ToReplace->eraseFromParent();
627+
628+
MadeChange = true;
629+
}
630+
return MadeChange;
631+
}
632+
633+
// Turns this:
634+
//
635+
// %base = ...
636+
// %ptr = gep %base + 15
637+
// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
638+
// %base' = relocate(%tok, i32 4, i32 4)
639+
// %ptr' = relocate(%tok, i32 4, i32 5)
640+
// %val = load %ptr'
641+
//
642+
// into this:
643+
//
644+
// %base = ...
645+
// %ptr = gep %base + 15
646+
// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
647+
// %base' = gc.relocate(%tok, i32 4, i32 4)
648+
// %ptr' = gep %base' + 15
649+
// %val = load %ptr'
650+
bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
651+
bool MadeChange = false;
652+
SmallVector<User *, 2> AllRelocateCalls;
653+
654+
for (auto *U : I.users())
655+
if (isGCRelocate(dyn_cast<Instruction>(U)))
656+
// Collect all the relocate calls associated with a statepoint
657+
AllRelocateCalls.push_back(U);
658+
659+
// We need atleast one base pointer relocation + one derived pointer
660+
// relocation to mangle
661+
if (AllRelocateCalls.size() < 2)
662+
return false;
663+
664+
// RelocateInstMap is a mapping from the base relocate instruction to the
665+
// corresponding derived relocate instructions
666+
DenseMap<IntrinsicInst *, SmallVector<IntrinsicInst *, 2>> RelocateInstMap;
667+
computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
668+
if (RelocateInstMap.empty())
669+
return false;
670+
671+
for (auto &Item : RelocateInstMap)
672+
// Item.first is the RelocatedBase to offset against
673+
// Item.second is the vector of Targets to replace
674+
MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
675+
return MadeChange;
676+
}
677+
524678
/// SinkCast - Sink the specified cast instruction into its user blocks
525679
static bool SinkCast(CastInst *CI) {
526680
BasicBlock *DefBB = CI->getParent();
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
; RUN: opt -codegenprepare -S < %s | FileCheck %s
2+
3+
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
4+
target triple = "x86_64-pc-linux-gnu"
5+
6+
declare zeroext i1 @return_i1()
7+
8+
define i32 @test_sor_basic(i32* %base) {
9+
; CHECK: getelementptr i32* %base, i32 15
10+
; CHECK: getelementptr i32* %base-new, i32 15
11+
entry:
12+
%ptr = getelementptr i32* %base, i32 15
13+
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
14+
%base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
15+
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
16+
%ret = load i32* %ptr-new
17+
ret i32 %ret
18+
}
19+
20+
define i32 @test_sor_two_derived(i32* %base) {
21+
; CHECK: getelementptr i32* %base, i32 15
22+
; CHECK: getelementptr i32* %base, i32 12
23+
; CHECK: getelementptr i32* %base-new, i32 15
24+
; CHECK: getelementptr i32* %base-new, i32 12
25+
entry:
26+
%ptr = getelementptr i32* %base, i32 15
27+
%ptr2 = getelementptr i32* %base, i32 12
28+
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
29+
%base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
30+
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
31+
%ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6)
32+
%ret = load i32* %ptr-new
33+
ret i32 %ret
34+
}
35+
36+
define i32 @test_sor_ooo(i32* %base) {
37+
; CHECK: getelementptr i32* %base, i32 15
38+
; CHECK: getelementptr i32* %base-new, i32 15
39+
entry:
40+
%ptr = getelementptr i32* %base, i32 15
41+
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr)
42+
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
43+
%base-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 4)
44+
%ret = load i32* %ptr-new
45+
ret i32 %ret
46+
}
47+
48+
define i32 @test_sor_gep_smallint([3 x i32]* %base) {
49+
; CHECK: getelementptr [3 x i32]* %base, i32 0, i32 2
50+
; CHECK: getelementptr [3 x i32]* %base-new, i32 0, i32 2
51+
entry:
52+
%ptr = getelementptr [3 x i32]* %base, i32 0, i32 2
53+
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
54+
%base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 4, i32 4)
55+
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
56+
%ret = load i32* %ptr-new
57+
ret i32 %ret
58+
}
59+
60+
define i32 @test_sor_gep_largeint([3 x i32]* %base) {
61+
; CHECK: getelementptr [3 x i32]* %base, i32 0, i32 21
62+
; CHECK-NOT: getelementptr [3 x i32]* %base-new, i32 0, i32 21
63+
entry:
64+
%ptr = getelementptr [3 x i32]* %base, i32 0, i32 21
65+
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, [3 x i32]* %base, i32* %ptr)
66+
%base-new = call [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32 %tok, i32 4, i32 4)
67+
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
68+
%ret = load i32* %ptr-new
69+
ret i32 %ret
70+
}
71+
72+
define i32 @test_sor_noop(i32* %base) {
73+
; CHECK: getelementptr i32* %base, i32 15
74+
; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
75+
; CHECK: call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6)
76+
entry:
77+
%ptr = getelementptr i32* %base, i32 15
78+
%ptr2 = getelementptr i32* %base, i32 12
79+
%tok = call i32 (i1 ()*, i32, i32, ...)* @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()* @return_i1, i32 0, i32 0, i32 0, i32* %base, i32* %ptr, i32* %ptr2)
80+
%ptr-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 5)
81+
%ptr2-new = call i32* @llvm.experimental.gc.relocate.p0i32(i32 %tok, i32 4, i32 6)
82+
%ret = load i32* %ptr-new
83+
ret i32 %ret
84+
}
85+
86+
declare i32 @llvm.experimental.gc.statepoint.p0f_i1f(i1 ()*, i32, i32, ...)
87+
declare i32* @llvm.experimental.gc.relocate.p0i32(i32, i32, i32)
88+
declare [3 x i32]* @llvm.experimental.gc.relocate.p0a3i32(i32, i32, i32)

0 commit comments

Comments
 (0)