From 75135aaf19f22637b60b0e29255f0000029bdbf9 Mon Sep 17 00:00:00 2001
From: Zalathar <Zalathar@users.noreply.github.com>
Date: Fri, 21 Mar 2025 15:07:05 +1100
Subject: [PATCH 1/2] coverage: Extract module `mapgen::unused` for handling
 unused functions

---
 .../src/coverageinfo/mapgen.rs                | 129 +-----------------
 .../src/coverageinfo/mapgen/unused.rs         | 128 +++++++++++++++++
 2 files changed, 132 insertions(+), 125 deletions(-)
 create mode 100644 compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs

diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
index 9a2473d6cf23d..23e068fafacf1 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -5,15 +5,11 @@ use rustc_abi::Align;
 use rustc_codegen_ssa::traits::{
     BaseTypeCodegenMethods, ConstCodegenMethods, StaticCodegenMethods,
 };
-use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
-use rustc_hir::def_id::{DefId, LocalDefId};
+use rustc_data_structures::fx::FxIndexMap;
 use rustc_index::IndexVec;
-use rustc_middle::mir;
-use rustc_middle::mir::mono::MonoItemPartitions;
-use rustc_middle::ty::{self, TyCtxt};
+use rustc_middle::ty::TyCtxt;
 use rustc_session::RemapFileNameExt;
 use rustc_session::config::RemapPathScopeComponents;
-use rustc_span::def_id::DefIdSet;
 use rustc_span::{SourceFile, StableSourceFileId};
 use tracing::debug;
 
@@ -24,6 +20,7 @@ use crate::llvm;
 
 mod covfun;
 mod spans;
+mod unused;
 
 /// Generates and exports the coverage map, which is embedded in special
 /// linker sections in the final binary.
@@ -76,7 +73,7 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
     // In a single designated CGU, also prepare covfun records for functions
     // in this crate that were instrumented for coverage, but are unused.
     if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
-        let mut unused_instances = gather_unused_function_instances(cx);
+        let mut unused_instances = unused::gather_unused_function_instances(cx);
         // Sort the unused instances by symbol name, for the same reason as the used ones.
         unused_instances.sort_by_cached_key(|&instance| tcx.symbol_name(instance).name);
         covfun_records.extend(unused_instances.into_iter().filter_map(|instance| {
@@ -249,121 +246,3 @@ fn generate_covmap_record<'ll>(cx: &CodegenCx<'ll, '_>, version: u32, filenames_
 
     cx.add_used_global(covmap_global);
 }
-
-/// Each CGU will normally only emit coverage metadata for the functions that it actually generates.
-/// But since we don't want unused functions to disappear from coverage reports, we also scan for
-/// functions that were instrumented but are not participating in codegen.
-///
-/// These unused functions don't need to be codegenned, but we do need to add them to the function
-/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
-/// We also end up adding their symbol names to a special global array that LLVM will include in
-/// its embedded coverage data.
-fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<ty::Instance<'tcx>> {
-    assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
-
-    let tcx = cx.tcx;
-    let usage = prepare_usage_sets(tcx);
-
-    let is_unused_fn = |def_id: LocalDefId| -> bool {
-        // Usage sets expect `DefId`, so convert from `LocalDefId`.
-        let d: DefId = LocalDefId::to_def_id(def_id);
-        // To be potentially eligible for "unused function" mappings, a definition must:
-        // - Be eligible for coverage instrumentation
-        // - Not participate directly in codegen (or have lost all its coverage statements)
-        // - Not have any coverage statements inlined into codegenned functions
-        tcx.is_eligible_for_coverage(def_id)
-            && (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d))
-            && !usage.used_via_inlining.contains(&d)
-    };
-
-    // FIXME(#79651): Consider trying to filter out dummy instantiations of
-    // unused generic functions from library crates, because they can produce
-    // "unused instantiation" in coverage reports even when they are actually
-    // used by some downstream crate in the same binary.
-
-    tcx.mir_keys(())
-        .iter()
-        .copied()
-        .filter(|&def_id| is_unused_fn(def_id))
-        .map(|def_id| make_dummy_instance(tcx, def_id))
-        .collect::<Vec<_>>()
-}
-
-struct UsageSets<'tcx> {
-    all_mono_items: &'tcx DefIdSet,
-    used_via_inlining: FxHashSet<DefId>,
-    missing_own_coverage: FxHashSet<DefId>,
-}
-
-/// Prepare sets of definitions that are relevant to deciding whether something
-/// is an "unused function" for coverage purposes.
-fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
-    let MonoItemPartitions { all_mono_items, codegen_units, .. } =
-        tcx.collect_and_partition_mono_items(());
-
-    // Obtain a MIR body for each function participating in codegen, via an
-    // arbitrary instance.
-    let mut def_ids_seen = FxHashSet::default();
-    let def_and_mir_for_all_mono_fns = codegen_units
-        .iter()
-        .flat_map(|cgu| cgu.items().keys())
-        .filter_map(|item| match item {
-            mir::mono::MonoItem::Fn(instance) => Some(instance),
-            mir::mono::MonoItem::Static(_) | mir::mono::MonoItem::GlobalAsm(_) => None,
-        })
-        // We only need one arbitrary instance per definition.
-        .filter(move |instance| def_ids_seen.insert(instance.def_id()))
-        .map(|instance| {
-            // We don't care about the instance, just its underlying MIR.
-            let body = tcx.instance_mir(instance.def);
-            (instance.def_id(), body)
-        });
-
-    // Functions whose coverage statements were found inlined into other functions.
-    let mut used_via_inlining = FxHashSet::default();
-    // Functions that were instrumented, but had all of their coverage statements
-    // removed by later MIR transforms (e.g. UnreachablePropagation).
-    let mut missing_own_coverage = FxHashSet::default();
-
-    for (def_id, body) in def_and_mir_for_all_mono_fns {
-        let mut saw_own_coverage = false;
-
-        // Inspect every coverage statement in the function's MIR.
-        for stmt in body
-            .basic_blocks
-            .iter()
-            .flat_map(|block| &block.statements)
-            .filter(|stmt| matches!(stmt.kind, mir::StatementKind::Coverage(_)))
-        {
-            if let Some(inlined) = stmt.source_info.scope.inlined_instance(&body.source_scopes) {
-                // This coverage statement was inlined from another function.
-                used_via_inlining.insert(inlined.def_id());
-            } else {
-                // Non-inlined coverage statements belong to the enclosing function.
-                saw_own_coverage = true;
-            }
-        }
-
-        if !saw_own_coverage && body.function_coverage_info.is_some() {
-            missing_own_coverage.insert(def_id);
-        }
-    }
-
-    UsageSets { all_mono_items, used_via_inlining, missing_own_coverage }
-}
-
-fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> {
-    let def_id = local_def_id.to_def_id();
-
-    // Make a dummy instance that fills in all generics with placeholders.
-    ty::Instance::new(
-        def_id,
-        ty::GenericArgs::for_item(tcx, def_id, |param, _| {
-            if let ty::GenericParamDefKind::Lifetime = param.kind {
-                tcx.lifetimes.re_erased.into()
-            } else {
-                tcx.mk_param_from_def(param)
-            }
-        }),
-    )
-}
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
new file mode 100644
index 0000000000000..e7c9075bc15d2
--- /dev/null
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
@@ -0,0 +1,128 @@
+use rustc_data_structures::fx::FxHashSet;
+use rustc_hir::def_id::{DefId, LocalDefId};
+use rustc_middle::mir;
+use rustc_middle::mir::mono::MonoItemPartitions;
+use rustc_middle::ty::{self, TyCtxt};
+use rustc_span::def_id::DefIdSet;
+
+use crate::common::CodegenCx;
+
+/// Each CGU will normally only emit coverage metadata for the functions that it actually generates.
+/// But since we don't want unused functions to disappear from coverage reports, we also scan for
+/// functions that were instrumented but are not participating in codegen.
+///
+/// These unused functions don't need to be codegenned, but we do need to add them to the function
+/// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
+/// We also end up adding their symbol names to a special global array that LLVM will include in
+/// its embedded coverage data.
+pub(crate) fn gather_unused_function_instances<'tcx>(
+    cx: &CodegenCx<'_, 'tcx>,
+) -> Vec<ty::Instance<'tcx>> {
+    assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
+
+    let tcx = cx.tcx;
+    let usage = prepare_usage_sets(tcx);
+
+    let is_unused_fn = |def_id: LocalDefId| -> bool {
+        // Usage sets expect `DefId`, so convert from `LocalDefId`.
+        let d: DefId = LocalDefId::to_def_id(def_id);
+        // To be potentially eligible for "unused function" mappings, a definition must:
+        // - Be eligible for coverage instrumentation
+        // - Not participate directly in codegen (or have lost all its coverage statements)
+        // - Not have any coverage statements inlined into codegenned functions
+        tcx.is_eligible_for_coverage(def_id)
+            && (!usage.all_mono_items.contains(&d) || usage.missing_own_coverage.contains(&d))
+            && !usage.used_via_inlining.contains(&d)
+    };
+
+    // FIXME(#79651): Consider trying to filter out dummy instantiations of
+    // unused generic functions from library crates, because they can produce
+    // "unused instantiation" in coverage reports even when they are actually
+    // used by some downstream crate in the same binary.
+
+    tcx.mir_keys(())
+        .iter()
+        .copied()
+        .filter(|&def_id| is_unused_fn(def_id))
+        .map(|def_id| make_dummy_instance(tcx, def_id))
+        .collect::<Vec<_>>()
+}
+
+struct UsageSets<'tcx> {
+    all_mono_items: &'tcx DefIdSet,
+    used_via_inlining: FxHashSet<DefId>,
+    missing_own_coverage: FxHashSet<DefId>,
+}
+
+/// Prepare sets of definitions that are relevant to deciding whether something
+/// is an "unused function" for coverage purposes.
+fn prepare_usage_sets<'tcx>(tcx: TyCtxt<'tcx>) -> UsageSets<'tcx> {
+    let MonoItemPartitions { all_mono_items, codegen_units, .. } =
+        tcx.collect_and_partition_mono_items(());
+
+    // Obtain a MIR body for each function participating in codegen, via an
+    // arbitrary instance.
+    let mut def_ids_seen = FxHashSet::default();
+    let def_and_mir_for_all_mono_fns = codegen_units
+        .iter()
+        .flat_map(|cgu| cgu.items().keys())
+        .filter_map(|item| match item {
+            mir::mono::MonoItem::Fn(instance) => Some(instance),
+            mir::mono::MonoItem::Static(_) | mir::mono::MonoItem::GlobalAsm(_) => None,
+        })
+        // We only need one arbitrary instance per definition.
+        .filter(move |instance| def_ids_seen.insert(instance.def_id()))
+        .map(|instance| {
+            // We don't care about the instance, just its underlying MIR.
+            let body = tcx.instance_mir(instance.def);
+            (instance.def_id(), body)
+        });
+
+    // Functions whose coverage statements were found inlined into other functions.
+    let mut used_via_inlining = FxHashSet::default();
+    // Functions that were instrumented, but had all of their coverage statements
+    // removed by later MIR transforms (e.g. UnreachablePropagation).
+    let mut missing_own_coverage = FxHashSet::default();
+
+    for (def_id, body) in def_and_mir_for_all_mono_fns {
+        let mut saw_own_coverage = false;
+
+        // Inspect every coverage statement in the function's MIR.
+        for stmt in body
+            .basic_blocks
+            .iter()
+            .flat_map(|block| &block.statements)
+            .filter(|stmt| matches!(stmt.kind, mir::StatementKind::Coverage(_)))
+        {
+            if let Some(inlined) = stmt.source_info.scope.inlined_instance(&body.source_scopes) {
+                // This coverage statement was inlined from another function.
+                used_via_inlining.insert(inlined.def_id());
+            } else {
+                // Non-inlined coverage statements belong to the enclosing function.
+                saw_own_coverage = true;
+            }
+        }
+
+        if !saw_own_coverage && body.function_coverage_info.is_some() {
+            missing_own_coverage.insert(def_id);
+        }
+    }
+
+    UsageSets { all_mono_items, used_via_inlining, missing_own_coverage }
+}
+
+fn make_dummy_instance<'tcx>(tcx: TyCtxt<'tcx>, local_def_id: LocalDefId) -> ty::Instance<'tcx> {
+    let def_id = local_def_id.to_def_id();
+
+    // Make a dummy instance that fills in all generics with placeholders.
+    ty::Instance::new(
+        def_id,
+        ty::GenericArgs::for_item(tcx, def_id, |param, _| {
+            if let ty::GenericParamDefKind::Lifetime = param.kind {
+                tcx.lifetimes.re_erased.into()
+            } else {
+                tcx.mk_param_from_def(param)
+            }
+        }),
+    )
+}

From b3c40cf374422ac8f6cbb14fa6747b2fae5762db Mon Sep 17 00:00:00 2001
From: Zalathar <Zalathar@users.noreply.github.com>
Date: Fri, 21 Mar 2025 15:14:22 +1100
Subject: [PATCH 2/2] coverage: Deal with unused functions and their names in
 one place

---
 .../src/coverageinfo/mapgen.rs                | 34 ++-----------
 .../src/coverageinfo/mapgen/covfun.rs         |  8 ----
 .../src/coverageinfo/mapgen/unused.rs         | 48 ++++++++++++++++++-
 3 files changed, 51 insertions(+), 39 deletions(-)

diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
index 23e068fafacf1..5e62ce285dd8c 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@@ -73,12 +73,11 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
     // In a single designated CGU, also prepare covfun records for functions
     // in this crate that were instrumented for coverage, but are unused.
     if cx.codegen_unit.is_code_coverage_dead_code_cgu() {
-        let mut unused_instances = unused::gather_unused_function_instances(cx);
-        // Sort the unused instances by symbol name, for the same reason as the used ones.
-        unused_instances.sort_by_cached_key(|&instance| tcx.symbol_name(instance).name);
-        covfun_records.extend(unused_instances.into_iter().filter_map(|instance| {
-            prepare_covfun_record(tcx, &mut global_file_table, instance, false)
-        }));
+        unused::prepare_covfun_records_for_unused_functions(
+            cx,
+            &mut global_file_table,
+            &mut covfun_records,
+        );
     }
 
     // If there are no covfun records for this CGU, don't generate a covmap record.
@@ -97,33 +96,10 @@ pub(crate) fn finalize(cx: &CodegenCx<'_, '_>) {
     // contain multiple covmap records from different compilation units.
     let filenames_hash = llvm_cov::hash_bytes(&filenames_buffer);
 
-    let mut unused_function_names = vec![];
-
     for covfun in &covfun_records {
-        unused_function_names.extend(covfun.mangled_function_name_if_unused());
-
         covfun::generate_covfun_record(cx, filenames_hash, covfun)
     }
 
-    // For unused functions, we need to take their mangled names and store them
-    // in a specially-named global array. LLVM's `InstrProfiling` pass will
-    // detect this global and include those names in its `__llvm_prf_names`
-    // section. (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.)
-    if !unused_function_names.is_empty() {
-        assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
-
-        let name_globals = unused_function_names
-            .into_iter()
-            .map(|mangled_function_name| cx.const_str(mangled_function_name).0)
-            .collect::<Vec<_>>();
-        let initializer = cx.const_array(cx.type_ptr(), &name_globals);
-
-        let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), c"__llvm_coverage_names");
-        llvm::set_global_constant(array, true);
-        llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
-        llvm::set_initializer(array, initializer);
-    }
-
     // Generate the coverage map header, which contains the filenames used by
     // this CGU's coverage mappings, and store it in a well-known global.
     // (This is skipped if we returned early due to having no covfun records.)
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
index 048e1988c3278..93419c2caad25 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/covfun.rs
@@ -37,14 +37,6 @@ pub(crate) struct CovfunRecord<'tcx> {
     regions: ffi::Regions,
 }
 
-impl<'tcx> CovfunRecord<'tcx> {
-    /// FIXME(Zalathar): Make this the responsibility of the code that determines
-    /// which functions are unused.
-    pub(crate) fn mangled_function_name_if_unused(&self) -> Option<&'tcx str> {
-        (!self.is_used).then_some(self.mangled_function_name)
-    }
-}
-
 pub(crate) fn prepare_covfun_record<'tcx>(
     tcx: TyCtxt<'tcx>,
     global_file_table: &mut GlobalFileTable,
diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
index e7c9075bc15d2..ab030f5b6152a 100644
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen/unused.rs
@@ -1,3 +1,4 @@
+use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, ConstCodegenMethods};
 use rustc_data_structures::fx::FxHashSet;
 use rustc_hir::def_id::{DefId, LocalDefId};
 use rustc_middle::mir;
@@ -6,6 +7,9 @@ use rustc_middle::ty::{self, TyCtxt};
 use rustc_span::def_id::DefIdSet;
 
 use crate::common::CodegenCx;
+use crate::coverageinfo::mapgen::GlobalFileTable;
+use crate::coverageinfo::mapgen::covfun::{CovfunRecord, prepare_covfun_record};
+use crate::llvm;
 
 /// Each CGU will normally only emit coverage metadata for the functions that it actually generates.
 /// But since we don't want unused functions to disappear from coverage reports, we also scan for
@@ -15,9 +19,48 @@ use crate::common::CodegenCx;
 /// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
 /// We also end up adding their symbol names to a special global array that LLVM will include in
 /// its embedded coverage data.
-pub(crate) fn gather_unused_function_instances<'tcx>(
+pub(crate) fn prepare_covfun_records_for_unused_functions<'tcx>(
     cx: &CodegenCx<'_, 'tcx>,
-) -> Vec<ty::Instance<'tcx>> {
+    global_file_table: &mut GlobalFileTable,
+    covfun_records: &mut Vec<CovfunRecord<'tcx>>,
+) {
+    assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
+
+    let mut unused_instances = gather_unused_function_instances(cx);
+    // Sort the unused instances by symbol name, so that their order isn't hash-sensitive.
+    unused_instances.sort_by_key(|instance| instance.symbol_name);
+
+    // Try to create a covfun record for each unused function.
+    let mut name_globals = Vec::with_capacity(unused_instances.len());
+    covfun_records.extend(unused_instances.into_iter().filter_map(|unused| try {
+        let record = prepare_covfun_record(cx.tcx, global_file_table, unused.instance, false)?;
+        // If successful, also store its symbol name in a global constant.
+        name_globals.push(cx.const_str(unused.symbol_name.name).0);
+        record
+    }));
+
+    // Store the names of unused functions in a specially-named global array.
+    // LLVM's `InstrProfilling` pass will detect this array, and include the
+    // referenced names in its `__llvm_prf_names` section.
+    // (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.)
+    if !name_globals.is_empty() {
+        let initializer = cx.const_array(cx.type_ptr(), &name_globals);
+
+        let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), c"__llvm_coverage_names");
+        llvm::set_global_constant(array, true);
+        llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
+        llvm::set_initializer(array, initializer);
+    }
+}
+
+/// Holds a dummy function instance along with its symbol name, to avoid having
+/// to repeatedly query for the name.
+struct UnusedInstance<'tcx> {
+    instance: ty::Instance<'tcx>,
+    symbol_name: ty::SymbolName<'tcx>,
+}
+
+fn gather_unused_function_instances<'tcx>(cx: &CodegenCx<'_, 'tcx>) -> Vec<UnusedInstance<'tcx>> {
     assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
 
     let tcx = cx.tcx;
@@ -45,6 +88,7 @@ pub(crate) fn gather_unused_function_instances<'tcx>(
         .copied()
         .filter(|&def_id| is_unused_fn(def_id))
         .map(|def_id| make_dummy_instance(tcx, def_id))
+        .map(|instance| UnusedInstance { instance, symbol_name: tcx.symbol_name(instance) })
         .collect::<Vec<_>>()
 }