Skip to content

Commit 68fc8df

Browse files
shawbyoungaaupov
andcommitted
[BOLT] Drop high discrepancy profiles in matching (llvm#95156)
Summary: Functions with high discrepancy (measured by matched function blocks) can be ignored with an added command line argument for better performance. Test Plan: Added stale-matching-min-matched-block.test --------- Co-authored-by: Amir Ayupov <[email protected]>
1 parent 6c17f1c commit 68fc8df

File tree

4 files changed

+96
-8
lines changed

4 files changed

+96
-8
lines changed

bolt/docs/CommandLineArgumentReference.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,11 @@
802802

803803
The maximum size of a function to consider for inference.
804804

805+
- `--stale-matching-min-matched-block=<uint>`
806+
807+
Minimum percent of exact match block for a function to be considered for
808+
profile inference.
809+
805810
- `--stale-threshold=<uint>`
806811

807812
Maximum percentage of stale functions to tolerate (default: 100)
@@ -1161,4 +1166,4 @@
11611166

11621167
- `--print-options`
11631168

1164-
Print non-default options after command line parsing
1169+
Print non-default options after command line parsing

bolt/lib/Profile/StaleProfileMatching.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,12 @@ cl::opt<bool>
5151
cl::desc("Infer counts from stale profile data."),
5252
cl::init(false), cl::Hidden, cl::cat(BoltOptCategory));
5353

54+
cl::opt<unsigned> StaleMatchingMinMatchedBlock(
55+
"stale-matching-min-matched-block",
56+
cl::desc("Percentage threshold of matched basic blocks at which stale "
57+
"profile inference is executed."),
58+
cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
59+
5460
cl::opt<unsigned> StaleMatchingMaxFuncSize(
5561
"stale-matching-max-func-size",
5662
cl::desc("The maximum size of a function to consider for inference."),
@@ -391,10 +397,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
391397
/// of the basic blocks in the binary, the count is "matched" to the block.
392398
/// Similarly, if both the source and the target of a count in the profile are
393399
/// matched to a jump in the binary, the count is recorded in CFG.
394-
void matchWeightsByHashes(BinaryContext &BC,
395-
const BinaryFunction::BasicBlockOrderType &BlockOrder,
396-
const yaml::bolt::BinaryFunctionProfile &YamlBF,
397-
FlowFunction &Func) {
400+
size_t matchWeightsByHashes(
401+
BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
402+
const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) {
398403
assert(Func.Blocks.size() == BlockOrder.size() + 1);
399404

400405
std::vector<FlowBlock *> Blocks;
@@ -500,6 +505,8 @@ void matchWeightsByHashes(BinaryContext &BC,
500505
Block.HasUnknownWeight = false;
501506
Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]);
502507
}
508+
509+
return MatchedBlocks.size();
503510
}
504511

505512
/// The function finds all blocks that are (i) reachable from the Entry block
@@ -575,10 +582,16 @@ void preprocessUnreachableBlocks(FlowFunction &Func) {
575582
/// Decide if stale profile matching can be applied for a given function.
576583
/// Currently we skip inference for (very) large instances and for instances
577584
/// having "unexpected" control flow (e.g., having no sink basic blocks).
578-
bool canApplyInference(const FlowFunction &Func) {
585+
bool canApplyInference(const FlowFunction &Func,
586+
const yaml::bolt::BinaryFunctionProfile &YamlBF,
587+
const uint64_t &MatchedBlocks) {
579588
if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
580589
return false;
581590

591+
if (MatchedBlocks * 100 <
592+
opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size())
593+
return false;
594+
582595
bool HasExitBlocks = llvm::any_of(
583596
Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
584597
if (!HasExitBlocks)
@@ -725,18 +738,21 @@ bool YAMLProfileReader::inferStaleProfile(
725738
const BinaryFunction::BasicBlockOrderType BlockOrder(
726739
BF.getLayout().block_begin(), BF.getLayout().block_end());
727740

741+
// Tracks the number of matched blocks.
742+
728743
// Create a wrapper flow function to use with the profile inference algorithm.
729744
FlowFunction Func = createFlowFunction(BlockOrder);
730745

731746
// Match as many block/jump counts from the stale profile as possible
732-
matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
747+
size_t MatchedBlocks =
748+
matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
733749

734750
// Adjust the flow function by marking unreachable blocks Unlikely so that
735751
// they don't get any counts assigned.
736752
preprocessUnreachableBlocks(Func);
737753

738754
// Check if profile inference can be applied for the instance.
739-
if (!canApplyInference(Func))
755+
if (!canApplyInference(Func, YamlBF, MatchedBlocks))
740756
return false;
741757

742758
// Apply the profile inference algorithm.
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
---
2+
header:
3+
profile-version: 1
4+
binary-name: 'reader-yaml.test.tmp.exe'
5+
binary-build-id: '<unknown>'
6+
profile-flags: [ lbr ]
7+
profile-origin: branch profile reader
8+
profile-events: ''
9+
dfs-order: false
10+
hash-func: xxh3
11+
functions:
12+
- name: SolveCubic
13+
fid: 6
14+
hash: 0x0000000000000000
15+
exec: 151
16+
nblocks: 18
17+
blocks:
18+
- bid: 0
19+
insns: 43
20+
hash: 0x4600940a609c0000
21+
exec: 151
22+
succ: [ { bid: 1, cnt: 151, mis: 2 }, { bid: 7, cnt: 0 } ]
23+
- bid: 1
24+
insns: 7
25+
hash: 0x167a1f084f130088
26+
succ: [ { bid: 13, cnt: 151 }, { bid: 2, cnt: 0 } ]
27+
- bid: 13
28+
insns: 26
29+
hash: 0xa8d50000f81902a7
30+
succ: [ { bid: 3, cnt: 89 }, { bid: 2, cnt: 10 } ]
31+
- bid: 3
32+
insns: 9
33+
hash: 0xc516000073dc00a0
34+
succ: [ { bid: 5, cnt: 151 } ]
35+
- bid: 5
36+
insns: 9
37+
hash: 0x6446e1ea500111
38+
- name: usqrt
39+
fid: 7
40+
hash: 0x0000000000000000
41+
exec: 20
42+
nblocks: 6
43+
blocks:
44+
- bid: 0
45+
insns: 4
46+
hash: 0x0000000000000001
47+
exec: 20
48+
succ: [ { bid: 1, cnt: 0 } ]
49+
- bid: 1
50+
insns: 9
51+
hash: 0x0000000000000001
52+
succ: [ { bid: 3, cnt: 320, mis: 171 }, { bid: 2, cnt: 0 } ]
53+
- bid: 3
54+
insns: 2
55+
hash: 0x0000000000000001
56+
succ: [ { bid: 1, cnt: 300, mis: 33 }, { bid: 4, cnt: 20 } ]
57+
...
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
## This script checks the stale-matching-min-matched-block flag.
2+
3+
RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
4+
5+
## Testing "usqrt"
6+
RUN: llvm-bolt %t.exe -o %t.null --b %p/Inputs/blarge_profile_stale_low_matched_blocks.yaml \
7+
RUN: --infer-stale-profile=1 --stale-matching-min-matched-block=75 \
8+
RUN: --profile-ignore-hash=1 --debug-only=bolt-prof 2>&1 | FileCheck %s
9+
10+
CHECK: BOLT-INFO: inferred profile for 1 (50.00% of profiled, 50.00% of stale) functions responsible for 46.31% samples (552 out of 1192)

0 commit comments

Comments
 (0)