Skip to content

Commit ac8a9f8

Browse files
[memprof] Undrift MemProfRecord (#120138)
This patch undrifts source locations in MemProfRecord before readMemprof starts the matching process. The thoery of operation is as follows: 1. Collect the lists of direct calls, one from the IR and the other from the profile. 2. Compute the correspondence (called undrift map in the patch) between the two lists with longestCommonSequence. 3. Apply the undrift map just before readMemprof consumes MemProfRecord. The new function gated by a flag that is off by default.
1 parent d8399d5 commit ac8a9f8

File tree

2 files changed

+220
-2
lines changed

2 files changed

+220
-2
lines changed

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,11 @@ static cl::opt<std::string>
171171
cl::desc("The default memprof options"),
172172
cl::Hidden, cl::init(""));
173173

174+
static cl::opt<bool>
175+
SalvageStaleProfile("memprof-salvage-stale-profile",
176+
cl::desc("Salvage stale MemProf profile"),
177+
cl::init(false), cl::Hidden);
178+
174179
extern cl::opt<bool> MemProfReportHintedSizes;
175180

176181
static cl::opt<unsigned> MinMatchedColdBytePercent(
@@ -911,10 +916,38 @@ memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader,
911916
return UndriftMaps;
912917
}
913918

919+
// Given a MemProfRecord, undrift all the source locations present in the
920+
// record in place.
921+
static void
922+
undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,
923+
memprof::MemProfRecord &MemProfRec) {
924+
// Undrift a call stack in place.
925+
auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {
926+
for (auto &F : CallStack) {
927+
auto I = UndriftMaps.find(F.Function);
928+
if (I == UndriftMaps.end())
929+
continue;
930+
auto J = I->second.find(LineLocation(F.LineOffset, F.Column));
931+
if (J == I->second.end())
932+
continue;
933+
auto &NewLoc = J->second;
934+
F.LineOffset = NewLoc.LineOffset;
935+
F.Column = NewLoc.Column;
936+
}
937+
};
938+
939+
for (auto &AS : MemProfRec.AllocSites)
940+
UndriftCallStack(AS.CallStack);
941+
942+
for (auto &CS : MemProfRec.CallSites)
943+
UndriftCallStack(CS);
944+
}
945+
914946
static void
915947
readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
916948
const TargetLibraryInfo &TLI,
917-
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {
949+
std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo,
950+
DenseMap<uint64_t, LocToLocMap> &UndriftMaps) {
918951
auto &Ctx = M.getContext();
919952
// Previously we used getIRPGOFuncName() here. If F is local linkage,
920953
// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But
@@ -962,6 +995,11 @@ readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
962995

963996
NumOfMemProfFunc++;
964997

998+
// If requested, undrfit MemProfRecord so that the source locations in it
999+
// match those in the IR.
1000+
if (SalvageStaleProfile)
1001+
undriftMemProfRecord(UndriftMaps, *MemProfRec);
1002+
9651003
// Detect if there are non-zero column numbers in the profile. If not,
9661004
// treat all column numbers as 0 when matching (i.e. ignore any non-zero
9671005
// columns in the IR). The profiled binary might have been built with
@@ -1195,6 +1233,11 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
11951233

11961234
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
11971235

1236+
TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());
1237+
DenseMap<uint64_t, LocToLocMap> UndriftMaps;
1238+
if (SalvageStaleProfile)
1239+
UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);
1240+
11981241
// Map from the stack has of each allocation context in the function profiles
11991242
// to the total profiled size (bytes), allocation type, and whether we matched
12001243
// it to an allocation in the IR.
@@ -1205,7 +1248,8 @@ PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
12051248
continue;
12061249

12071250
const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1208-
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo);
1251+
readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,
1252+
UndriftMaps);
12091253
}
12101254

12111255
if (ClPrintMemProfMatchInfo) {
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
; REQUIRES: x86_64-linux
2+
3+
; Make sure that we can undrift the MemProf profile and annotate the IR
4+
; accordingly.
5+
;
6+
; The IR was generated from:
7+
;
8+
; char *foo() { return ::new char[4]; }
9+
; char *leaf() { return ::new char[4]; }
10+
; char *middle() { return leaf(); }
11+
; char *aaa() { return middle(); }
12+
; char *bbb() { return middle(); }
13+
;
14+
; int main() {
15+
; foo();
16+
;
17+
; char *a = aaa();
18+
; char *b = bbb();
19+
; a[0] = 'a';
20+
; b[0] = 'b';
21+
; delete[] a;
22+
; sleep(10);
23+
; delete[] b;
24+
;
25+
; return 0;
26+
; }
27+
28+
; RUN: split-file %s %t
29+
; RUN: llvm-profdata merge %t/memprof_undrift.yaml -o %t/memprof_undrift.memprofdata
30+
; RUN: opt < %t/memprof_undrift.ll -passes='memprof-use<profile-filename=%t/memprof_undrift.memprofdata>' -memprof-salvage-stale-profile -memprof-ave-lifetime-cold-threshold=5 -S 2>&1 | FileCheck %s
31+
32+
;--- memprof_undrift.yaml
33+
---
34+
HeapProfileRecords:
35+
- GUID: _Z3aaav
36+
AllocSites: []
37+
CallSites:
38+
- - { Function: _Z3aaav, LineOffset: 5, Column: 33, IsInlineFrame: false }
39+
- GUID: _Z6middlev
40+
AllocSites: []
41+
CallSites:
42+
- - { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false }
43+
- GUID: _Z3foov
44+
AllocSites:
45+
- Callstack:
46+
- { Function: _Z3foov, LineOffset: 5, Column: 33, IsInlineFrame: false }
47+
- { Function: main, LineOffset: 5, Column: 33, IsInlineFrame: false }
48+
MemInfoBlock:
49+
AllocCount: 1
50+
TotalSize: 4
51+
TotalLifetime: 10000
52+
TotalLifetimeAccessDensity: 0
53+
CallSites: []
54+
- GUID: _Z4leafv
55+
AllocSites:
56+
- Callstack:
57+
- { Function: _Z4leafv, LineOffset: 5, Column: 33, IsInlineFrame: false }
58+
- { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false }
59+
- { Function: _Z3aaav, LineOffset: 5, Column: 33, IsInlineFrame: false }
60+
- { Function: main, LineOffset: 5, Column: 33, IsInlineFrame: false }
61+
MemInfoBlock:
62+
AllocCount: 1
63+
TotalSize: 4
64+
TotalLifetime: 0
65+
TotalLifetimeAccessDensity: 25000
66+
- Callstack:
67+
- { Function: _Z4leafv, LineOffset: 5, Column: 33, IsInlineFrame: false }
68+
- { Function: _Z6middlev, LineOffset: 5, Column: 33, IsInlineFrame: false }
69+
- { Function: _Z3bbbv, LineOffset: 5, Column: 33, IsInlineFrame: false }
70+
- { Function: main, LineOffset: 5, Column: 33, IsInlineFrame: false }
71+
MemInfoBlock:
72+
AllocCount: 1
73+
TotalSize: 4
74+
TotalLifetime: 10000
75+
TotalLifetimeAccessDensity: 2
76+
CallSites: []
77+
- GUID: _Z3bbbv
78+
AllocSites: []
79+
CallSites:
80+
- - { Function: _Z3bbbv, LineOffset: 5, Column: 33, IsInlineFrame: false }
81+
...
82+
;--- memprof_undrift.ll
83+
define dso_local ptr @_Z3foov() !dbg !5 {
84+
; CHECK-LABEL: @_Z3foov()
85+
entry:
86+
%call = call ptr @_Znam(i64 4) #1, !dbg !8
87+
; CHECK: call ptr @_Znam(i64 4) #[[ATTR:[0-9]+]]
88+
ret ptr %call, !dbg !9
89+
}
90+
91+
; Function Attrs: nobuiltin allocsize(0)
92+
declare ptr @_Znam(i64 noundef) #0
93+
94+
define dso_local ptr @_Z4leafv() !dbg !10 {
95+
; CHECK-LABEL: @_Z4leafv()
96+
entry:
97+
%call = call ptr @_Znam(i64 4) #1, !dbg !11
98+
; CHECK: call ptr @_Znam(i64 4) {{.*}}, !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]]
99+
ret ptr %call, !dbg !12
100+
}
101+
102+
define dso_local ptr @_Z6middlev() !dbg !13 {
103+
; CHECK-LABEL: @_Z6middlev()
104+
entry:
105+
%call.i = call ptr @_Znam(i64 4) #1, !dbg !14
106+
; CHECK: call ptr @_Znam(i64 4) {{.*}}, !callsite ![[C2:[0-9]+]]
107+
ret ptr %call.i, !dbg !16
108+
}
109+
110+
define dso_local ptr @_Z3aaav() !dbg !17 {
111+
; CHECK-LABEL: @_Z3aaav()
112+
entry:
113+
%call.i.i = call ptr @_Znam(i64 4) #1, !dbg !18
114+
; CHECK: call ptr @_Znam(i64 4) {{.*}}, !callsite ![[C3:[0-9]+]]
115+
ret ptr %call.i.i, !dbg !21
116+
}
117+
118+
define dso_local ptr @_Z3bbbv() !dbg !22 {
119+
; CHECK-LABEL: @_Z3bbbv()
120+
entry:
121+
%call.i.i = call ptr @_Znam(i64 4) #1, !dbg !23
122+
; CHECK: call ptr @_Znam(i64 4) {{.*}}, !callsite ![[C4:[0-9]+]]
123+
ret ptr %call.i.i, !dbg !26
124+
}
125+
126+
attributes #0 = { nobuiltin allocsize(0) }
127+
attributes #1 = { builtin allocsize(0) }
128+
129+
!llvm.dbg.cu = !{!0}
130+
!llvm.module.flags = !{!2, !3}
131+
132+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1)
133+
!1 = !DIFile(filename: "undrift.cc", directory: "/")
134+
!2 = !{i32 7, !"Dwarf Version", i32 5}
135+
!3 = !{i32 2, !"Debug Info Version", i32 3}
136+
!4 = !{}
137+
!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 56, type: !6, unit: !0)
138+
!6 = !DISubroutineType(types: !7)
139+
!7 = !{}
140+
!8 = !DILocation(line: 56, column: 22, scope: !5)
141+
!9 = !DILocation(line: 56, column: 15, scope: !5)
142+
!10 = distinct !DISubprogram(name: "leaf", linkageName: "_Z4leafv", scope: !1, file: !1, line: 58, type: !6, unit: !0)
143+
!11 = !DILocation(line: 58, column: 23, scope: !10)
144+
!12 = !DILocation(line: 58, column: 16, scope: !10)
145+
!13 = distinct !DISubprogram(name: "middle", linkageName: "_Z6middlev", scope: !1, file: !1, line: 59, type: !6, unit: !0)
146+
!14 = !DILocation(line: 58, column: 23, scope: !10, inlinedAt: !15)
147+
!15 = distinct !DILocation(line: 59, column: 25, scope: !13)
148+
!16 = !DILocation(line: 59, column: 18, scope: !13)
149+
!17 = distinct !DISubprogram(name: "aaa", linkageName: "_Z3aaav", scope: !1, file: !1, line: 61, type: !6, unit: !0)
150+
!18 = !DILocation(line: 58, column: 23, scope: !10, inlinedAt: !19)
151+
!19 = distinct !DILocation(line: 59, column: 25, scope: !13, inlinedAt: !20)
152+
!20 = distinct !DILocation(line: 61, column: 22, scope: !17)
153+
!21 = !DILocation(line: 61, column: 15, scope: !17)
154+
!22 = distinct !DISubprogram(name: "bbb", linkageName: "_Z3bbbv", scope: !1, file: !1, line: 62, type: !6, unit: !0)
155+
!23 = !DILocation(line: 58, column: 23, scope: !10, inlinedAt: !24)
156+
!24 = distinct !DILocation(line: 59, column: 25, scope: !13, inlinedAt: !25)
157+
!25 = distinct !DILocation(line: 62, column: 22, scope: !22)
158+
!26 = !DILocation(line: 62, column: 15, scope: !22)
159+
160+
; CHECK: attributes #[[ATTR]] = { builtin allocsize(0) "memprof"="cold" }
161+
162+
; CHECK: ![[M1]] = !{![[M1L:[0-9]+]], ![[M1R:[0-9]+]]}
163+
; CHECK: ![[M1L]] = !{![[M1LL:[0-9]+]], !"cold"}
164+
; CHECK: ![[M1LL]] = !{i64 -7165227774426488445, i64 6179674587295384169, i64 7749555980993309703}
165+
; CHECK: ![[M1R]] = !{![[M1RL:[0-9]+]], !"notcold"}
166+
; CHECK: ![[M1RL]] = !{i64 -7165227774426488445, i64 6179674587295384169, i64 -4748707735015301746}
167+
168+
; CHECK: ![[C1]] = !{i64 -7165227774426488445}
169+
170+
; CHECK: ![[C2]] = !{i64 6179674587295384169}
171+
172+
; CHECK: ![[C3]] = !{i64 -4748707735015301746}
173+
174+
; CHECK: ![[C4]] = !{i64 7749555980993309703}

0 commit comments

Comments
 (0)