[BOLT] Drop high discrepancy profiles in matching (llvm#95156)

shawbyoung · aaupov · shawbyoung · commit 68fc8dffe466 · 2024-06-17T15:14:35.000-07:00
Summary: Functions with high discrepancy 
(measured by matched function blocks) 
can be ignored with an added command line 
argument for better performance.

Test Plan: Added 
stale-matching-min-matched-block.test

---------

Co-authored-by: Amir Ayupov &lt;aaupov@fb.com&gt;
diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
@@ -802,6 +802,11 @@
 
   The maximum size of a function to consider for inference.
 
+- `--stale-matching-min-matched-block=<uint>`
+
+  Minimum percent of exact match block for a function to be considered for
+  profile inference.
+
 - `--stale-threshold=<uint>`
 
   Maximum percentage of stale functions to tolerate (default: 100)
@@ -1161,4 +1166,4 @@
 
 - `--print-options`
 
-  Print non-default options after command line parsing
+  Print non-default options after command line parsing
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -51,6 +51,12 @@ cl::opt<bool>
                       cl::desc("Infer counts from stale profile data."),
                       cl::init(false), cl::Hidden, cl::cat(BoltOptCategory));
 
+cl::opt<unsigned> StaleMatchingMinMatchedBlock(
+    "stale-matching-min-matched-block",
+    cl::desc("Percentage threshold of matched basic blocks at which stale "
+             "profile inference is executed."),
+    cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
+
 cl::opt<unsigned> StaleMatchingMaxFuncSize(
     "stale-matching-max-func-size",
     cl::desc("The maximum size of a function to consider for inference."),
@@ -391,10 +397,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 /// of the basic blocks in the binary, the count is "matched" to the block.
 /// Similarly, if both the source and the target of a count in the profile are
 /// matched to a jump in the binary, the count is recorded in CFG.
-void matchWeightsByHashes(BinaryContext &BC,
-                          const BinaryFunction::BasicBlockOrderType &BlockOrder,
-                          const yaml::bolt::BinaryFunctionProfile &YamlBF,
-                          FlowFunction &Func) {
+size_t matchWeightsByHashes(
+    BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
+    const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) {
   assert(Func.Blocks.size() == BlockOrder.size() + 1);
 
   std::vector<FlowBlock *> Blocks;
@@ -500,6 +505,8 @@ void matchWeightsByHashes(BinaryContext &BC,
     Block.HasUnknownWeight = false;
     Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]);
   }
+
+  return MatchedBlocks.size();
 }
 
 /// The function finds all blocks that are (i) reachable from the Entry block
@@ -575,10 +582,16 @@ void preprocessUnreachableBlocks(FlowFunction &Func) {
 /// Decide if stale profile matching can be applied for a given function.
 /// Currently we skip inference for (very) large instances and for instances
 /// having "unexpected" control flow (e.g., having no sink basic blocks).
-bool canApplyInference(const FlowFunction &Func) {
+bool canApplyInference(const FlowFunction &Func,
+                       const yaml::bolt::BinaryFunctionProfile &YamlBF,
+                       const uint64_t &MatchedBlocks) {
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
+  if (MatchedBlocks * 100 <
+      opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size())
+    return false;
+
   bool HasExitBlocks = llvm::any_of(
       Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
   if (!HasExitBlocks)
@@ -725,18 +738,21 @@ bool YAMLProfileReader::inferStaleProfile(
   const BinaryFunction::BasicBlockOrderType BlockOrder(
       BF.getLayout().block_begin(), BF.getLayout().block_end());
 
+  // Tracks the number of matched blocks.
+
   // Create a wrapper flow function to use with the profile inference algorithm.
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
-  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
+  size_t MatchedBlocks =
+      matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.
   preprocessUnreachableBlocks(Func);
 
   // Check if profile inference can be applied for the instance.
-  if (!canApplyInference(Func))
+  if (!canApplyInference(Func, YamlBF, MatchedBlocks))
     return false;
 
   // Apply the profile inference algorithm.
diff --git a/bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml b/bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml
@@ -0,0 +1,57 @@
+---
+header:
+  profile-version: 1
+  binary-name:     'reader-yaml.test.tmp.exe'
+  binary-build-id: '<unknown>'
+  profile-flags:   [ lbr ]
+  profile-origin:  branch profile reader
+  profile-events:  ''
+  dfs-order:       false
+  hash-func:       xxh3
+functions:
+  - name:            SolveCubic
+    fid:             6
+    hash:            0x0000000000000000
+    exec:            151
+    nblocks:         18
+    blocks:
+      - bid:             0
+        insns:           43
+        hash:            0x4600940a609c0000
+        exec:            151
+        succ:            [ { bid: 1, cnt: 151, mis: 2 }, { bid: 7, cnt: 0 } ]
+      - bid:             1
+        insns:           7
+        hash:            0x167a1f084f130088
+        succ:            [ { bid: 13, cnt: 151 }, { bid: 2, cnt: 0 } ]
+      - bid:             13
+        insns:           26
+        hash:            0xa8d50000f81902a7
+        succ:            [ { bid: 3, cnt: 89 }, { bid: 2, cnt: 10 } ]
+      - bid:             3
+        insns:           9
+        hash:            0xc516000073dc00a0
+        succ:            [ { bid: 5, cnt: 151 } ]
+      - bid:             5
+        insns:           9
+        hash:            0x6446e1ea500111
+  - name:            usqrt
+    fid:             7
+    hash:            0x0000000000000000
+    exec:            20
+    nblocks:         6
+    blocks:
+      - bid:             0
+        insns:           4
+        hash:            0x0000000000000001
+        exec:            20
+        succ:            [ { bid: 1, cnt: 0 } ]
+      - bid:             1
+        insns:           9
+        hash:            0x0000000000000001
+        succ:            [ { bid: 3, cnt: 320, mis: 171 }, { bid: 2, cnt: 0 } ]
+      - bid:             3
+        insns:           2
+        hash:            0x0000000000000001
+        succ:            [ { bid: 1, cnt: 300, mis: 33 }, { bid: 4, cnt: 20 } ]
+...
diff --git a/bolt/test/X86/stale-matching-min-matched-block.test b/bolt/test/X86/stale-matching-min-matched-block.test
@@ -0,0 +1,10 @@
+## This script checks the stale-matching-min-matched-block flag.
+
+RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
+
+## Testing "usqrt"
+RUN: llvm-bolt %t.exe -o %t.null --b %p/Inputs/blarge_profile_stale_low_matched_blocks.yaml \
+RUN:   --infer-stale-profile=1 --stale-matching-min-matched-block=75 \
+RUN:   --profile-ignore-hash=1 --debug-only=bolt-prof 2>&1 | FileCheck %s
+
+CHECK:    BOLT-INFO: inferred profile for 1 (50.00% of profiled, 50.00% of stale) functions responsible for 46.31% samples (552 out of 1192)