diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
index ac5aaea561cca..24cf3f061a567 100644
--- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs
+++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs
@@ -10,12 +10,12 @@ use std::mem;
 use cranelift_codegen::ir::{ArgumentPurpose, SigRef};
 use cranelift_codegen::isa::CallConv;
 use cranelift_module::ModuleError;
+use rustc_codegen_ssa::base::is_call_from_compiler_builtins_to_upstream_monomorphization;
 use rustc_codegen_ssa::errors::CompilerBuiltinsCannotCall;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_middle::ty::layout::FnAbiOf;
 use rustc_middle::ty::print::with_no_trimmed_paths;
 use rustc_middle::ty::TypeVisitableExt;
-use rustc_monomorphize::is_call_from_compiler_builtins_to_upstream_monomorphization;
 use rustc_session::Session;
 use rustc_span::source_map::Spanned;
 use rustc_target::abi::call::{Conv, FnAbi, PassMode};
diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs
index 5adbbb09ac85f..9bc7b57c53745 100644
--- a/compiler/rustc_codegen_cranelift/src/base.rs
+++ b/compiler/rustc_codegen_cranelift/src/base.rs
@@ -5,13 +5,13 @@ use cranelift_codegen::CodegenError;
 use cranelift_frontend::{FunctionBuilder, FunctionBuilderContext};
 use cranelift_module::ModuleError;
 use rustc_ast::InlineAsmOptions;
+use rustc_codegen_ssa::base::is_call_from_compiler_builtins_to_upstream_monomorphization;
 use rustc_index::IndexVec;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_middle::ty::adjustment::PointerCoercion;
 use rustc_middle::ty::layout::FnAbiOf;
 use rustc_middle::ty::print::with_no_trimmed_paths;
 use rustc_middle::ty::TypeVisitableExt;
-use rustc_monomorphize::is_call_from_compiler_builtins_to_upstream_monomorphization;
 
 use crate::constant::ConstantCx;
 use crate::debuginfo::{FunctionDebugContext, TypeDebugContext};
diff --git a/compiler/rustc_codegen_cranelift/src/lib.rs b/compiler/rustc_codegen_cranelift/src/lib.rs
index 192e6c91ea38b..8d3d5ac98e1e5 100644
--- a/compiler/rustc_codegen_cranelift/src/lib.rs
+++ b/compiler/rustc_codegen_cranelift/src/lib.rs
@@ -24,7 +24,6 @@ extern crate rustc_hir;
 extern crate rustc_incremental;
 extern crate rustc_index;
 extern crate rustc_metadata;
-extern crate rustc_monomorphize;
 extern crate rustc_session;
 extern crate rustc_span;
 extern crate rustc_target;
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 137f14fe706cc..399ac4858505e 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -806,6 +806,34 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
     ongoing_codegen
 }
 
+/// Returns whether a call from the current crate to the [`Instance`] would produce a call
+/// from `compiler_builtins` to a symbol the linker must resolve.
+///
+/// Such calls from `compiler_bultins` are effectively impossible for the linker to handle. Some
+/// linkers will optimize such that dead calls to unresolved symbols are not an error, but this is
+/// not guaranteed. So we used this function in codegen backends to ensure we do not generate any
+/// unlinkable calls.
+///
+/// Note that calls to LLVM intrinsics are uniquely okay because they won't make it to the linker.
+pub fn is_call_from_compiler_builtins_to_upstream_monomorphization<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    instance: Instance<'tcx>,
+) -> bool {
+    fn is_llvm_intrinsic(tcx: TyCtxt<'_>, def_id: DefId) -> bool {
+        if let Some(name) = tcx.codegen_fn_attrs(def_id).link_name {
+            name.as_str().starts_with("llvm.")
+        } else {
+            false
+        }
+    }
+
+    let def_id = instance.def_id();
+    !def_id.is_local()
+        && tcx.is_compiler_builtins(LOCAL_CRATE)
+        && !is_llvm_intrinsic(tcx, def_id)
+        && !tcx.should_codegen_locally(instance)
+}
+
 impl CrateInfo {
     pub fn new(tcx: TyCtxt<'_>, target_cpu: String) -> CrateInfo {
         let crate_types = tcx.crate_types().to_vec();
diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs
index 6a5525dc2b34f..c9c8f02c491bd 100644
--- a/compiler/rustc_codegen_ssa/src/mir/block.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/block.rs
@@ -3,7 +3,7 @@ use super::operand::OperandValue::{Immediate, Pair, Ref, ZeroSized};
 use super::place::{PlaceRef, PlaceValue};
 use super::{CachedLlbb, FunctionCx, LocalRef};
 
-use crate::base;
+use crate::base::{self, is_call_from_compiler_builtins_to_upstream_monomorphization};
 use crate::common::{self, IntPredicate};
 use crate::errors::CompilerBuiltinsCannotCall;
 use crate::meth;
@@ -18,7 +18,6 @@ use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf, ValidityRequirement};
 use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths};
 use rustc_middle::ty::{self, Instance, Ty};
 use rustc_middle::{bug, span_bug};
-use rustc_monomorphize::is_call_from_compiler_builtins_to_upstream_monomorphization;
 use rustc_session::config::OptLevel;
 use rustc_span::{source_map::Spanned, sym, Span};
 use rustc_target::abi::call::{ArgAbi, FnAbi, PassMode, Reg};
diff --git a/compiler/rustc_middle/src/hooks/mod.rs b/compiler/rustc_middle/src/hooks/mod.rs
index bf10a71dbaec1..75dc685a16aee 100644
--- a/compiler/rustc_middle/src/hooks/mod.rs
+++ b/compiler/rustc_middle/src/hooks/mod.rs
@@ -103,6 +103,10 @@ declare_hooks! {
 
     /// Create a list-like THIR representation for debugging.
     hook thir_flat(key: LocalDefId) -> String;
+
+    /// Returns `true` if we should codegen an instance in the local crate, or returns `false` if we
+    /// can just link to the upstream crate and therefore don't need a mono item.
+    hook should_codegen_locally(instance: crate::ty::Instance<'tcx>) -> bool;
 }
 
 #[cold]
diff --git a/compiler/rustc_mir_build/src/build/matches/mod.rs b/compiler/rustc_mir_build/src/build/matches/mod.rs
index 7f4a2e73d338d..95bc8b3d0cbc4 100644
--- a/compiler/rustc_mir_build/src/build/matches/mod.rs
+++ b/compiler/rustc_mir_build/src/build/matches/mod.rs
@@ -1598,6 +1598,9 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
                 for subcandidate in candidate.subcandidates.iter_mut() {
                     expanded_candidates.push(subcandidate);
                 }
+                // Note that the subcandidates have been added to `expanded_candidates`,
+                // but `candidate` itself has not. If the last candidate has more match pairs,
+                // they are handled separately by `test_remaining_match_pairs_after_or`.
             } else {
                 // A candidate that doesn't start with an or-pattern has nothing to
                 // expand, so it is included in the post-expansion list as-is.
@@ -1613,19 +1616,28 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
             expanded_candidates.as_mut_slice(),
         );
 
-        // Simplify subcandidates and process any leftover match pairs.
-        for candidate in candidates_to_expand {
+        // Postprocess subcandidates, and process any leftover match pairs.
+        // (Only the last candidate can possibly have more match pairs.)
+        debug_assert!({
+            let mut all_except_last = candidates_to_expand.iter().rev().skip(1);
+            all_except_last.all(|candidate| candidate.match_pairs.is_empty())
+        });
+        for candidate in candidates_to_expand.iter_mut() {
             if !candidate.subcandidates.is_empty() {
-                self.finalize_or_candidate(span, scrutinee_span, candidate);
+                self.merge_trivial_subcandidates(candidate);
+                self.remove_never_subcandidates(candidate);
             }
         }
+        if let Some(last_candidate) = candidates_to_expand.last_mut() {
+            self.test_remaining_match_pairs_after_or(span, scrutinee_span, last_candidate);
+        }
 
         remainder_start.and(remaining_candidates)
     }
 
     /// Given a match-pair that corresponds to an or-pattern, expand each subpattern into a new
-    /// subcandidate. Any candidate that has been expanded that way should be passed to
-    /// `finalize_or_candidate` after its subcandidates have been processed.
+    /// subcandidate. Any candidate that has been expanded this way should also be postprocessed
+    /// at the end of [`Self::expand_and_match_or_candidates`].
     fn create_or_subcandidates<'pat>(
         &mut self,
         candidate: &mut Candidate<'pat, 'tcx>,
@@ -1642,7 +1654,8 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
         candidate.subcandidates[0].false_edge_start_block = candidate.false_edge_start_block;
     }
 
-    /// Simplify subcandidates and process any leftover match pairs. The candidate should have been
+    /// Try to merge all of the subcandidates of the given candidate into one. This avoids
+    /// exponentially large CFGs in cases like `(1 | 2, 3 | 4, ...)`. The candidate should have been
     /// expanded with `create_or_subcandidates`.
     ///
     /// Given a pattern `(P | Q, R | S)` we (in principle) generate a CFG like
@@ -1695,103 +1708,128 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
     ///      |
     ///     ...
     /// ```
-    fn finalize_or_candidate(
-        &mut self,
-        span: Span,
-        scrutinee_span: Span,
-        candidate: &mut Candidate<'_, 'tcx>,
-    ) {
-        if candidate.subcandidates.is_empty() {
+    ///
+    /// Note that this takes place _after_ the subcandidates have participated
+    /// in match tree lowering.
+    fn merge_trivial_subcandidates(&mut self, candidate: &mut Candidate<'_, 'tcx>) {
+        assert!(!candidate.subcandidates.is_empty());
+        if candidate.has_guard {
+            // FIXME(or_patterns; matthewjasper) Don't give up if we have a guard.
             return;
         }
 
-        self.merge_trivial_subcandidates(candidate);
+        // FIXME(or_patterns; matthewjasper) Try to be more aggressive here.
+        let can_merge = candidate.subcandidates.iter().all(|subcandidate| {
+            subcandidate.subcandidates.is_empty() && subcandidate.extra_data.is_empty()
+        });
+        if !can_merge {
+            return;
+        }
 
-        if !candidate.match_pairs.is_empty() {
-            let or_span = candidate.or_span.unwrap_or(candidate.extra_data.span);
-            let source_info = self.source_info(or_span);
-            // If more match pairs remain, test them after each subcandidate.
-            // We could add them to the or-candidates before the call to `test_or_pattern` but this
-            // would make it impossible to detect simplifiable or-patterns. That would guarantee
-            // exponentially large CFGs for cases like `(1 | 2, 3 | 4, ...)`.
-            let mut last_otherwise = None;
-            candidate.visit_leaves(|leaf_candidate| {
-                last_otherwise = leaf_candidate.otherwise_block;
-            });
-            let remaining_match_pairs = mem::take(&mut candidate.match_pairs);
-            candidate.visit_leaves(|leaf_candidate| {
-                assert!(leaf_candidate.match_pairs.is_empty());
-                leaf_candidate.match_pairs.extend(remaining_match_pairs.iter().cloned());
-                let or_start = leaf_candidate.pre_binding_block.unwrap();
-                let otherwise =
-                    self.match_candidates(span, scrutinee_span, or_start, &mut [leaf_candidate]);
-                // In a case like `(P | Q, R | S)`, if `P` succeeds and `R | S` fails, we know `(Q,
-                // R | S)` will fail too. If there is no guard, we skip testing of `Q` by branching
-                // directly to `last_otherwise`. If there is a guard,
-                // `leaf_candidate.otherwise_block` can be reached by guard failure as well, so we
-                // can't skip `Q`.
-                let or_otherwise = if leaf_candidate.has_guard {
-                    leaf_candidate.otherwise_block.unwrap()
-                } else {
-                    last_otherwise.unwrap()
-                };
-                self.cfg.goto(otherwise, source_info, or_otherwise);
-            });
+        let mut last_otherwise = None;
+        let shared_pre_binding_block = self.cfg.start_new_block();
+        // This candidate is about to become a leaf, so unset `or_span`.
+        let or_span = candidate.or_span.take().unwrap();
+        let source_info = self.source_info(or_span);
+
+        if candidate.false_edge_start_block.is_none() {
+            candidate.false_edge_start_block = candidate.subcandidates[0].false_edge_start_block;
+        }
+
+        // Remove the (known-trivial) subcandidates from the candidate tree,
+        // so that they aren't visible after match tree lowering, and wire them
+        // all to join up at a single shared pre-binding block.
+        // (Note that the subcandidates have already had their part of the match
+        // tree lowered by this point, which is why we can add a goto to them.)
+        for subcandidate in mem::take(&mut candidate.subcandidates) {
+            let subcandidate_block = subcandidate.pre_binding_block.unwrap();
+            self.cfg.goto(subcandidate_block, source_info, shared_pre_binding_block);
+            last_otherwise = subcandidate.otherwise_block;
         }
+        candidate.pre_binding_block = Some(shared_pre_binding_block);
+        assert!(last_otherwise.is_some());
+        candidate.otherwise_block = last_otherwise;
     }
 
-    /// Try to merge all of the subcandidates of the given candidate into one. This avoids
-    /// exponentially large CFGs in cases like `(1 | 2, 3 | 4, ...)`. The candidate should have been
-    /// expanded with `create_or_subcandidates`.
-    fn merge_trivial_subcandidates(&mut self, candidate: &mut Candidate<'_, 'tcx>) {
-        if candidate.subcandidates.is_empty() || candidate.has_guard {
-            // FIXME(or_patterns; matthewjasper) Don't give up if we have a guard.
+    /// Never subcandidates may have a set of bindings inconsistent with their siblings,
+    /// which would break later code. So we filter them out. Note that we can't filter out
+    /// top-level candidates this way.
+    fn remove_never_subcandidates(&mut self, candidate: &mut Candidate<'_, 'tcx>) {
+        if candidate.subcandidates.is_empty() {
             return;
         }
 
-        // FIXME(or_patterns; matthewjasper) Try to be more aggressive here.
-        let can_merge = candidate.subcandidates.iter().all(|subcandidate| {
-            subcandidate.subcandidates.is_empty() && subcandidate.extra_data.is_empty()
-        });
-        if can_merge {
-            let mut last_otherwise = None;
-            let any_matches = self.cfg.start_new_block();
-            let or_span = candidate.or_span.take().unwrap();
-            let source_info = self.source_info(or_span);
-            if candidate.false_edge_start_block.is_none() {
-                candidate.false_edge_start_block =
-                    candidate.subcandidates[0].false_edge_start_block;
-            }
-            for subcandidate in mem::take(&mut candidate.subcandidates) {
-                let or_block = subcandidate.pre_binding_block.unwrap();
-                self.cfg.goto(or_block, source_info, any_matches);
-                last_otherwise = subcandidate.otherwise_block;
-            }
-            candidate.pre_binding_block = Some(any_matches);
-            assert!(last_otherwise.is_some());
-            candidate.otherwise_block = last_otherwise;
-        } else {
-            // Never subcandidates may have a set of bindings inconsistent with their siblings,
-            // which would break later code. So we filter them out. Note that we can't filter out
-            // top-level candidates this way.
-            candidate.subcandidates.retain_mut(|candidate| {
-                if candidate.extra_data.is_never {
-                    candidate.visit_leaves(|subcandidate| {
-                        let block = subcandidate.pre_binding_block.unwrap();
-                        // That block is already unreachable but needs a terminator to make the MIR well-formed.
-                        let source_info = self.source_info(subcandidate.extra_data.span);
-                        self.cfg.terminate(block, source_info, TerminatorKind::Unreachable);
-                    });
-                    false
-                } else {
-                    true
-                }
-            });
-            if candidate.subcandidates.is_empty() {
-                // If `candidate` has become a leaf candidate, ensure it has a `pre_binding_block`.
-                candidate.pre_binding_block = Some(self.cfg.start_new_block());
+        candidate.subcandidates.retain_mut(|candidate| {
+            if candidate.extra_data.is_never {
+                candidate.visit_leaves(|subcandidate| {
+                    let block = subcandidate.pre_binding_block.unwrap();
+                    // That block is already unreachable but needs a terminator to make the MIR well-formed.
+                    let source_info = self.source_info(subcandidate.extra_data.span);
+                    self.cfg.terminate(block, source_info, TerminatorKind::Unreachable);
+                });
+                false
+            } else {
+                true
             }
+        });
+        if candidate.subcandidates.is_empty() {
+            // If `candidate` has become a leaf candidate, ensure it has a `pre_binding_block`.
+            candidate.pre_binding_block = Some(self.cfg.start_new_block());
+        }
+    }
+
+    /// If more match pairs remain, test them after each subcandidate.
+    /// We could have added them to the or-candidates during or-pattern expansion, but that
+    /// would make it impossible to detect simplifiable or-patterns. That would guarantee
+    /// exponentially large CFGs for cases like `(1 | 2, 3 | 4, ...)`.
+    fn test_remaining_match_pairs_after_or(
+        &mut self,
+        span: Span,
+        scrutinee_span: Span,
+        candidate: &mut Candidate<'_, 'tcx>,
+    ) {
+        if candidate.match_pairs.is_empty() {
+            return;
         }
+
+        let or_span = candidate.or_span.unwrap_or(candidate.extra_data.span);
+        let source_info = self.source_info(or_span);
+        let mut last_otherwise = None;
+        candidate.visit_leaves(|leaf_candidate| {
+            last_otherwise = leaf_candidate.otherwise_block;
+        });
+
+        let remaining_match_pairs = mem::take(&mut candidate.match_pairs);
+        // We're testing match pairs that remained after an `Or`, so the remaining
+        // pairs should all be `Or` too, due to the sorting invariant.
+        debug_assert!(
+            remaining_match_pairs
+                .iter()
+                .all(|match_pair| matches!(match_pair.test_case, TestCase::Or { .. }))
+        );
+
+        candidate.visit_leaves(|leaf_candidate| {
+            // At this point the leaf's own match pairs have all been lowered
+            // and removed, so `extend` and assignment are equivalent,
+            // but extending can also recycle any existing vector capacity.
+            assert!(leaf_candidate.match_pairs.is_empty());
+            leaf_candidate.match_pairs.extend(remaining_match_pairs.iter().cloned());
+
+            let or_start = leaf_candidate.pre_binding_block.unwrap();
+            let otherwise =
+                self.match_candidates(span, scrutinee_span, or_start, &mut [leaf_candidate]);
+            // In a case like `(P | Q, R | S)`, if `P` succeeds and `R | S` fails, we know `(Q,
+            // R | S)` will fail too. If there is no guard, we skip testing of `Q` by branching
+            // directly to `last_otherwise`. If there is a guard,
+            // `leaf_candidate.otherwise_block` can be reached by guard failure as well, so we
+            // can't skip `Q`.
+            let or_otherwise = if leaf_candidate.has_guard {
+                leaf_candidate.otherwise_block.unwrap()
+            } else {
+                last_otherwise.unwrap()
+            };
+            self.cfg.goto(otherwise, source_info, or_otherwise);
+        });
     }
 
     /// Pick a test to run. Which test doesn't matter as long as it is guaranteed to fully match at
diff --git a/compiler/rustc_monomorphize/src/collector.rs b/compiler/rustc_monomorphize/src/collector.rs
index bfd505c06726d..3655a677ba0ad 100644
--- a/compiler/rustc_monomorphize/src/collector.rs
+++ b/compiler/rustc_monomorphize/src/collector.rs
@@ -228,6 +228,7 @@ use rustc_middle::ty::{
     self, AssocKind, GenericParamDefKind, Instance, InstanceKind, Ty, TyCtxt, TypeFoldable,
     TypeVisitableExt, VtblEntry,
 };
+use rustc_middle::util::Providers;
 use rustc_middle::{bug, span_bug};
 use rustc_session::config::EntryFnType;
 use rustc_session::Limit;
@@ -399,7 +400,7 @@ fn collect_items_rec<'tcx>(
                 let instance = Instance::mono(tcx, def_id);
 
                 // Sanity check whether this ended up being collected accidentally
-                debug_assert!(should_codegen_locally(tcx, instance));
+                debug_assert!(tcx.should_codegen_locally(instance));
 
                 let DefKind::Static { nested, .. } = tcx.def_kind(def_id) else { bug!() };
                 // Nested statics have no type.
@@ -431,7 +432,7 @@ fn collect_items_rec<'tcx>(
         }
         MonoItem::Fn(instance) => {
             // Sanity check whether this ended up being collected accidentally
-            debug_assert!(should_codegen_locally(tcx, instance));
+            debug_assert!(tcx.should_codegen_locally(instance));
 
             // Keep track of the monomorphization recursion depth
             recursion_depth_reset = Some(check_recursion_limit(
@@ -475,7 +476,7 @@ fn collect_items_rec<'tcx>(
                         }
                         hir::InlineAsmOperand::SymStatic { path: _, def_id } => {
                             let instance = Instance::mono(tcx, *def_id);
-                            if should_codegen_locally(tcx, instance) {
+                            if tcx.should_codegen_locally(instance) {
                                 trace!("collecting static {:?}", def_id);
                                 used_items.push(dummy_spanned(MonoItem::Static(*def_id)));
                             }
@@ -712,7 +713,7 @@ impl<'a, 'tcx> MirVisitor<'tcx> for MirUsedCollector<'a, 'tcx> {
                 if let ty::Closure(def_id, args) = *source_ty.kind() {
                     let instance =
                         Instance::resolve_closure(self.tcx, def_id, args, ty::ClosureKind::FnOnce);
-                    if should_codegen_locally(self.tcx, instance) {
+                    if self.tcx.should_codegen_locally(instance) {
                         self.used_items.push(create_fn_mono_item(self.tcx, instance, span));
                     }
                 } else {
@@ -722,7 +723,7 @@ impl<'a, 'tcx> MirVisitor<'tcx> for MirUsedCollector<'a, 'tcx> {
             mir::Rvalue::ThreadLocalRef(def_id) => {
                 assert!(self.tcx.is_thread_local_static(def_id));
                 let instance = Instance::mono(self.tcx, def_id);
-                if should_codegen_locally(self.tcx, instance) {
+                if self.tcx.should_codegen_locally(instance) {
                     trace!("collecting thread-local static {:?}", def_id);
                     self.used_items.push(respan(span, MonoItem::Static(def_id)));
                 }
@@ -749,7 +750,7 @@ impl<'a, 'tcx> MirVisitor<'tcx> for MirUsedCollector<'a, 'tcx> {
         let tcx = self.tcx;
         let push_mono_lang_item = |this: &mut Self, lang_item: LangItem| {
             let instance = Instance::mono(tcx, tcx.require_lang_item(lang_item, Some(source)));
-            if should_codegen_locally(tcx, instance) {
+            if tcx.should_codegen_locally(instance) {
                 this.used_items.push(create_fn_mono_item(tcx, instance, source));
             }
         };
@@ -783,7 +784,7 @@ impl<'a, 'tcx> MirVisitor<'tcx> for MirUsedCollector<'a, 'tcx> {
                         }
                         mir::InlineAsmOperand::SymStatic { def_id } => {
                             let instance = Instance::mono(self.tcx, def_id);
-                            if should_codegen_locally(self.tcx, instance) {
+                            if self.tcx.should_codegen_locally(instance) {
                                 trace!("collecting asm sym static {:?}", def_id);
                                 self.used_items.push(respan(source, MonoItem::Static(def_id)));
                             }
@@ -873,7 +874,7 @@ fn visit_instance_use<'tcx>(
     output: &mut MonoItems<'tcx>,
 ) {
     debug!("visit_item_use({:?}, is_direct_call={:?})", instance, is_direct_call);
-    if !should_codegen_locally(tcx, instance) {
+    if !tcx.should_codegen_locally(instance) {
         return;
     }
     if let ty::InstanceKind::Intrinsic(def_id) = instance.def {
@@ -885,13 +886,13 @@ fn visit_instance_use<'tcx>(
             // codegen a call to that function without generating code for the function itself.
             let def_id = tcx.require_lang_item(LangItem::PanicNounwind, None);
             let panic_instance = Instance::mono(tcx, def_id);
-            if should_codegen_locally(tcx, panic_instance) {
+            if tcx.should_codegen_locally(panic_instance) {
                 output.push(create_fn_mono_item(tcx, panic_instance, source));
             }
         } else if tcx.has_attr(def_id, sym::rustc_intrinsic) {
             // Codegen the fallback body of intrinsics with fallback bodies
             let instance = ty::Instance::new(def_id, instance.args);
-            if should_codegen_locally(tcx, instance) {
+            if tcx.should_codegen_locally(instance) {
                 output.push(create_fn_mono_item(tcx, instance, source));
             }
         }
@@ -930,7 +931,7 @@ fn visit_instance_use<'tcx>(
 
 /// Returns `true` if we should codegen an instance in the local crate, or returns `false` if we
 /// can just link to the upstream crate and therefore don't need a mono item.
-pub(crate) fn should_codegen_locally<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> bool {
+fn should_codegen_locally<'tcx>(tcx: TyCtxtAt<'tcx>, instance: Instance<'tcx>) -> bool {
     let Some(def_id) = instance.def.def_id_if_not_guaranteed_local_codegen() else {
         return true;
     };
@@ -946,7 +947,7 @@ pub(crate) fn should_codegen_locally<'tcx>(tcx: TyCtxt<'tcx>, instance: Instance
     }
 
     if tcx.is_reachable_non_generic(def_id)
-        || instance.polymorphize(tcx).upstream_monomorphization(tcx).is_some()
+        || instance.polymorphize(*tcx).upstream_monomorphization(*tcx).is_some()
     {
         // We can link to the item in question, no instance needed in this crate.
         return false;
@@ -1127,7 +1128,7 @@ fn create_mono_items_for_vtable_methods<'tcx>(
                     None
                 }
                 VtblEntry::Method(instance) => {
-                    Some(*instance).filter(|instance| should_codegen_locally(tcx, *instance))
+                    Some(*instance).filter(|instance| tcx.should_codegen_locally(*instance))
                 }
             })
             .map(|item| create_fn_mono_item(tcx, item, source));
@@ -1144,7 +1145,7 @@ fn collect_alloc<'tcx>(tcx: TyCtxt<'tcx>, alloc_id: AllocId, output: &mut MonoIt
         GlobalAlloc::Static(def_id) => {
             assert!(!tcx.is_thread_local_static(def_id));
             let instance = Instance::mono(tcx, def_id);
-            if should_codegen_locally(tcx, instance) {
+            if tcx.should_codegen_locally(instance) {
                 trace!("collecting static {:?}", def_id);
                 output.push(dummy_spanned(MonoItem::Static(def_id)));
             }
@@ -1162,7 +1163,7 @@ fn collect_alloc<'tcx>(tcx: TyCtxt<'tcx>, alloc_id: AllocId, output: &mut MonoIt
             }
         }
         GlobalAlloc::Function { instance, .. } => {
-            if should_codegen_locally(tcx, instance) {
+            if tcx.should_codegen_locally(instance) {
                 trace!("collecting {:?} with {:#?}", alloc_id, instance);
                 output.push(create_fn_mono_item(tcx, instance, DUMMY_SP));
             }
@@ -1284,7 +1285,7 @@ fn visit_mentioned_item<'tcx>(
             if let ty::Closure(def_id, args) = *source_ty.kind() {
                 let instance =
                     Instance::resolve_closure(tcx, def_id, args, ty::ClosureKind::FnOnce);
-                if should_codegen_locally(tcx, instance) {
+                if tcx.should_codegen_locally(instance) {
                     output.push(create_fn_mono_item(tcx, instance, span));
                 }
             } else {
@@ -1557,7 +1558,7 @@ fn create_mono_items_for_default_impls<'tcx>(
         let instance = ty::Instance::expect_resolve(tcx, param_env, method.def_id, args, DUMMY_SP);
 
         let mono_item = create_fn_mono_item(tcx, instance, DUMMY_SP);
-        if mono_item.node.is_instantiable(tcx) && should_codegen_locally(tcx, instance) {
+        if mono_item.node.is_instantiable(tcx) && tcx.should_codegen_locally(instance) {
             output.push(mono_item);
         }
     }
@@ -1613,3 +1614,7 @@ pub(crate) fn collect_crate_mono_items<'tcx>(
 
     (mono_items, state.usage_map.into_inner())
 }
+
+pub fn provide(providers: &mut Providers) {
+    providers.hooks.should_codegen_locally = should_codegen_locally;
+}
diff --git a/compiler/rustc_monomorphize/src/lib.rs b/compiler/rustc_monomorphize/src/lib.rs
index aa3b4cd5b6782..fc6e8e0d14fd2 100644
--- a/compiler/rustc_monomorphize/src/lib.rs
+++ b/compiler/rustc_monomorphize/src/lib.rs
@@ -5,14 +5,11 @@
 
 use rustc_hir::lang_items::LangItem;
 use rustc_middle::bug;
-use rustc_middle::query::{Providers, TyCtxtAt};
+use rustc_middle::query::TyCtxtAt;
 use rustc_middle::traits;
 use rustc_middle::ty::adjustment::CustomCoerceUnsized;
-use rustc_middle::ty::Instance;
-use rustc_middle::ty::TyCtxt;
 use rustc_middle::ty::{self, Ty};
-use rustc_span::def_id::DefId;
-use rustc_span::def_id::LOCAL_CRATE;
+use rustc_middle::util::Providers;
 use rustc_span::ErrorGuaranteed;
 
 mod collector;
@@ -21,8 +18,6 @@ mod partitioning;
 mod polymorphize;
 mod util;
 
-use collector::should_codegen_locally;
-
 rustc_fluent_macro::fluent_messages! { "../messages.ftl" }
 
 fn custom_coerce_unsize_info<'tcx>(
@@ -47,34 +42,6 @@ fn custom_coerce_unsize_info<'tcx>(
     }
 }
 
-/// Returns whether a call from the current crate to the [`Instance`] would produce a call
-/// from `compiler_builtins` to a symbol the linker must resolve.
-///
-/// Such calls from `compiler_bultins` are effectively impossible for the linker to handle. Some
-/// linkers will optimize such that dead calls to unresolved symbols are not an error, but this is
-/// not guaranteed. So we used this function in codegen backends to ensure we do not generate any
-/// unlinkable calls.
-///
-/// Note that calls to LLVM intrinsics are uniquely okay because they won't make it to the linker.
-pub fn is_call_from_compiler_builtins_to_upstream_monomorphization<'tcx>(
-    tcx: TyCtxt<'tcx>,
-    instance: Instance<'tcx>,
-) -> bool {
-    fn is_llvm_intrinsic(tcx: TyCtxt<'_>, def_id: DefId) -> bool {
-        if let Some(name) = tcx.codegen_fn_attrs(def_id).link_name {
-            name.as_str().starts_with("llvm.")
-        } else {
-            false
-        }
-    }
-
-    let def_id = instance.def_id();
-    !def_id.is_local()
-        && tcx.is_compiler_builtins(LOCAL_CRATE)
-        && !is_llvm_intrinsic(tcx, def_id)
-        && !should_codegen_locally(tcx, instance)
-}
-
 pub fn provide(providers: &mut Providers) {
     partitioning::provide(providers);
     polymorphize::provide(providers);
diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs
index 9a7c488833a1a..8c7c5e0074abf 100644
--- a/compiler/rustc_monomorphize/src/partitioning.rs
+++ b/compiler/rustc_monomorphize/src/partitioning.rs
@@ -112,9 +112,9 @@ use rustc_middle::mir::mono::{
     CodegenUnit, CodegenUnitNameBuilder, InstantiationMode, Linkage, MonoItem, MonoItemData,
     Visibility,
 };
-use rustc_middle::query::Providers;
 use rustc_middle::ty::print::{characteristic_def_id_of_type, with_no_trimmed_paths};
 use rustc_middle::ty::{self, visit::TypeVisitableExt, InstanceKind, TyCtxt};
+use rustc_middle::util::Providers;
 use rustc_session::config::{DumpMonoStatsFormat, SwitchWithOptPath};
 use rustc_session::CodegenUnits;
 use rustc_span::symbol::Symbol;
@@ -1314,4 +1314,6 @@ pub fn provide(providers: &mut Providers) {
             .find(|cgu| cgu.name() == name)
             .unwrap_or_else(|| panic!("failed to find cgu with name {name:?}"))
     };
+
+    collector::provide(providers);
 }
diff --git a/library/std/src/os/solid/io.rs b/library/std/src/os/solid/io.rs
index 19b4fe22093c3..e75bcf74e5cd3 100644
--- a/library/std/src/os/solid/io.rs
+++ b/library/std/src/os/solid/io.rs
@@ -44,7 +44,6 @@
 //!
 //! [`BorrowedFd<'a>`]: crate::os::solid::io::BorrowedFd
 
-#![deny(unsafe_op_in_unsafe_fn)]
 #![unstable(feature = "solid_ext", issue = "none")]
 
 use crate::fmt;
diff --git a/library/std/src/os/solid/mod.rs b/library/std/src/os/solid/mod.rs
index 0bb83c73ddf7c..75824048e2498 100644
--- a/library/std/src/os/solid/mod.rs
+++ b/library/std/src/os/solid/mod.rs
@@ -1,4 +1,5 @@
 #![stable(feature = "rust1", since = "1.0.0")]
+#![forbid(unsafe_op_in_unsafe_fn)]
 
 pub mod ffi;
 pub mod io;
diff --git a/library/std/src/sys/pal/solid/mod.rs b/library/std/src/sys/pal/solid/mod.rs
index 9a7741ddda71e..0b158fb63df70 100644
--- a/library/std/src/sys/pal/solid/mod.rs
+++ b/library/std/src/sys/pal/solid/mod.rs
@@ -1,6 +1,6 @@
 #![allow(dead_code)]
 #![allow(missing_docs, nonstandard_style)]
-#![deny(unsafe_op_in_unsafe_fn)]
+#![forbid(unsafe_op_in_unsafe_fn)]
 
 pub mod abi;
 
diff --git a/library/std/src/sys/pal/unix/stack_overflow.rs b/library/std/src/sys/pal/unix/stack_overflow.rs
index 0db08c1a926f5..6eeec48bf5eaf 100644
--- a/library/std/src/sys/pal/unix/stack_overflow.rs
+++ b/library/std/src/sys/pal/unix/stack_overflow.rs
@@ -87,13 +87,18 @@ mod imp {
     // out many large systems and all implementations allow returning from a
     // signal handler to work. For a more detailed explanation see the
     // comments on #26458.
+    /// SIGSEGV/SIGBUS entry point
+    /// # Safety
+    /// Rust doesn't call this, it *gets called*.
+    #[forbid(unsafe_op_in_unsafe_fn)]
     unsafe extern "C" fn signal_handler(
         signum: libc::c_int,
         info: *mut libc::siginfo_t,
         _data: *mut libc::c_void,
     ) {
         let (start, end) = GUARD.get();
-        let addr = (*info).si_addr() as usize;
+        // SAFETY: this pointer is provided by the system and will always point to a valid `siginfo_t`.
+        let addr = unsafe { (*info).si_addr().addr() };
 
         // If the faulting address is within the guard page, then we print a
         // message saying so and abort.
@@ -105,9 +110,11 @@ mod imp {
             rtabort!("stack overflow");
         } else {
             // Unregister ourselves by reverting back to the default behavior.
-            let mut action: sigaction = mem::zeroed();
+            // SAFETY: assuming all platforms define struct sigaction as "zero-initializable"
+            let mut action: sigaction = unsafe { mem::zeroed() };
             action.sa_sigaction = SIG_DFL;
-            sigaction(signum, &action, ptr::null_mut());
+            // SAFETY: pray this is a well-behaved POSIX implementation of fn sigaction
+            unsafe { sigaction(signum, &action, ptr::null_mut()) };
 
             // See comment above for why this function returns.
         }
@@ -117,32 +124,45 @@ mod imp {
     static MAIN_ALTSTACK: AtomicPtr<libc::c_void> = AtomicPtr::new(ptr::null_mut());
     static NEED_ALTSTACK: AtomicBool = AtomicBool::new(false);
 
+    /// # Safety
+    /// Must be called only once
+    #[forbid(unsafe_op_in_unsafe_fn)]
     pub unsafe fn init() {
         PAGE_SIZE.store(os::page_size(), Ordering::Relaxed);
 
         // Always write to GUARD to ensure the TLS variable is allocated.
-        let guard = install_main_guard().unwrap_or(0..0);
+        let guard = unsafe { install_main_guard().unwrap_or(0..0) };
         GUARD.set((guard.start, guard.end));
 
-        let mut action: sigaction = mem::zeroed();
+        // SAFETY: assuming all platforms define struct sigaction as "zero-initializable"
+        let mut action: sigaction = unsafe { mem::zeroed() };
         for &signal in &[SIGSEGV, SIGBUS] {
-            sigaction(signal, ptr::null_mut(), &mut action);
+            // SAFETY: just fetches the current signal handler into action
+            unsafe { sigaction(signal, ptr::null_mut(), &mut action) };
             // Configure our signal handler if one is not already set.
             if action.sa_sigaction == SIG_DFL {
+                if !NEED_ALTSTACK.load(Ordering::Relaxed) {
+                    // haven't set up our sigaltstack yet
+                    NEED_ALTSTACK.store(true, Ordering::Release);
+                    let handler = unsafe { make_handler(true) };
+                    MAIN_ALTSTACK.store(handler.data, Ordering::Relaxed);
+                    mem::forget(handler);
+                }
                 action.sa_flags = SA_SIGINFO | SA_ONSTACK;
                 action.sa_sigaction = signal_handler as sighandler_t;
-                sigaction(signal, &action, ptr::null_mut());
-                NEED_ALTSTACK.store(true, Ordering::Relaxed);
+                // SAFETY: only overriding signals if the default is set
+                unsafe { sigaction(signal, &action, ptr::null_mut()) };
             }
         }
-
-        let handler = make_handler(true);
-        MAIN_ALTSTACK.store(handler.data, Ordering::Relaxed);
-        mem::forget(handler);
     }
 
+    /// # Safety
+    /// Must be called only once
+    #[forbid(unsafe_op_in_unsafe_fn)]
     pub unsafe fn cleanup() {
-        drop_handler(MAIN_ALTSTACK.load(Ordering::Relaxed));
+        // FIXME: I probably cause more bugs than I'm worth!
+        // see https://github.com/rust-lang/rust/issues/111272
+        unsafe { drop_handler(MAIN_ALTSTACK.load(Ordering::Relaxed)) };
     }
 
     unsafe fn get_stack() -> libc::stack_t {
@@ -187,34 +207,48 @@ mod imp {
         libc::stack_t { ss_sp: stackp, ss_flags: 0, ss_size: sigstack_size }
     }
 
+    /// # Safety
+    /// Mutates the alternate signal stack
+    #[forbid(unsafe_op_in_unsafe_fn)]
     pub unsafe fn make_handler(main_thread: bool) -> Handler {
-        if !NEED_ALTSTACK.load(Ordering::Relaxed) {
+        if !NEED_ALTSTACK.load(Ordering::Acquire) {
             return Handler::null();
         }
 
         if !main_thread {
             // Always write to GUARD to ensure the TLS variable is allocated.
-            let guard = current_guard().unwrap_or(0..0);
+            let guard = unsafe { current_guard() }.unwrap_or(0..0);
             GUARD.set((guard.start, guard.end));
         }
 
-        let mut stack = mem::zeroed();
-        sigaltstack(ptr::null(), &mut stack);
+        // SAFETY: assuming stack_t is zero-initializable
+        let mut stack = unsafe { mem::zeroed() };
+        // SAFETY: reads current stack_t into stack
+        unsafe { sigaltstack(ptr::null(), &mut stack) };
         // Configure alternate signal stack, if one is not already set.
         if stack.ss_flags & SS_DISABLE != 0 {
-            stack = get_stack();
-            sigaltstack(&stack, ptr::null_mut());
+            // SAFETY: We warned our caller this would happen!
+            unsafe {
+                stack = get_stack();
+                sigaltstack(&stack, ptr::null_mut());
+            }
             Handler { data: stack.ss_sp as *mut libc::c_void }
         } else {
             Handler::null()
         }
     }
 
+    /// # Safety
+    /// Must be called
+    /// - only with our handler or nullptr
+    /// - only when done with our altstack
+    /// This disables the alternate signal stack!
+    #[forbid(unsafe_op_in_unsafe_fn)]
     pub unsafe fn drop_handler(data: *mut libc::c_void) {
         if !data.is_null() {
             let sigstack_size = sigstack_size();
             let page_size = PAGE_SIZE.load(Ordering::Relaxed);
-            let stack = libc::stack_t {
+            let disabling_stack = libc::stack_t {
                 ss_sp: ptr::null_mut(),
                 ss_flags: SS_DISABLE,
                 // Workaround for bug in macOS implementation of sigaltstack
@@ -223,10 +257,11 @@ mod imp {
                 // both ss_sp and ss_size should be ignored in this case.
                 ss_size: sigstack_size,
             };
-            sigaltstack(&stack, ptr::null_mut());
-            // We know from `get_stackp` that the alternate stack we installed is part of a mapping
-            // that started one page earlier, so walk back a page and unmap from there.
-            munmap(data.sub(page_size), sigstack_size + page_size);
+            // SAFETY: we warned the caller this disables the alternate signal stack!
+            unsafe { sigaltstack(&disabling_stack, ptr::null_mut()) };
+            // SAFETY: We know from `get_stackp` that the alternate stack we installed is part of
+            // a mapping that started one page earlier, so walk back a page and unmap from there.
+            unsafe { munmap(data.sub(page_size), sigstack_size + page_size) };
         }
     }
 
@@ -455,6 +490,7 @@ mod imp {
     }
 
     #[cfg(any(target_os = "macos", target_os = "openbsd", target_os = "solaris"))]
+    // FIXME: I am probably not unsafe.
     unsafe fn current_guard() -> Option<Range<usize>> {
         let stackptr = get_stack_start()?;
         let stackaddr = stackptr.addr();
@@ -469,6 +505,7 @@ mod imp {
         target_os = "netbsd",
         target_os = "l4re"
     ))]
+    // FIXME: I am probably not unsafe.
     unsafe fn current_guard() -> Option<Range<usize>> {
         let mut ret = None;
         let mut attr: libc::pthread_attr_t = crate::mem::zeroed();
diff --git a/library/std/src/sys/path/unsupported_backslash.rs b/library/std/src/sys/path/unsupported_backslash.rs
index 7045c9be25b08..855f443678c6c 100644
--- a/library/std/src/sys/path/unsupported_backslash.rs
+++ b/library/std/src/sys/path/unsupported_backslash.rs
@@ -1,3 +1,4 @@
+#![forbid(unsafe_op_in_unsafe_fn)]
 use crate::ffi::OsStr;
 use crate::io;
 use crate::path::{Path, PathBuf, Prefix};
diff --git a/library/std/src/sys/sync/mutex/itron.rs b/library/std/src/sys/sync/mutex/itron.rs
index 4ba32a8fbcd69..b29c7e1d03444 100644
--- a/library/std/src/sys/sync/mutex/itron.rs
+++ b/library/std/src/sys/sync/mutex/itron.rs
@@ -1,5 +1,6 @@
 //! Mutex implementation backed by μITRON mutexes. Assumes `acre_mtx` and
 //! `TA_INHERIT` are available.
+#![forbid(unsafe_op_in_unsafe_fn)]
 
 use crate::sys::pal::itron::{
     abi,
diff --git a/library/std/src/sys/sync/rwlock/solid.rs b/library/std/src/sys/sync/rwlock/solid.rs
index 7558eee8edd33..a8fef685ceb27 100644
--- a/library/std/src/sys/sync/rwlock/solid.rs
+++ b/library/std/src/sys/sync/rwlock/solid.rs
@@ -1,4 +1,5 @@
 //! A readers-writer lock implementation backed by the SOLID kernel extension.
+#![forbid(unsafe_op_in_unsafe_fn)]
 
 use crate::sys::pal::{
     abi,
diff --git a/src/bootstrap/src/core/build_steps/llvm.rs b/src/bootstrap/src/core/build_steps/llvm.rs
index 4479030184118..f234b08f5e2ca 100644
--- a/src/bootstrap/src/core/build_steps/llvm.rs
+++ b/src/bootstrap/src/core/build_steps/llvm.rs
@@ -26,7 +26,6 @@ use crate::{generate_smart_stamp_hash, CLang, GitRepo, Kind};
 
 use crate::utils::exec::command;
 use build_helper::ci::CiEnv;
-use build_helper::git::get_git_merge_base;
 
 #[derive(Clone)]
 pub struct LlvmResult {
@@ -154,26 +153,18 @@ pub fn prebuilt_llvm_config(builder: &Builder<'_>, target: TargetSelection) -> L
 /// This retrieves the LLVM sha we *want* to use, according to git history.
 pub(crate) fn detect_llvm_sha(config: &Config, is_git: bool) -> String {
     let llvm_sha = if is_git {
-        // We proceed in 2 steps. First we get the closest commit that is actually upstream. Then we
-        // walk back further to the last bors merge commit that actually changed LLVM. The first
-        // step will fail on CI because only the `auto` branch exists; we just fall back to `HEAD`
-        // in that case.
-        let closest_upstream = get_git_merge_base(&config.git_config(), Some(&config.src))
-            .unwrap_or_else(|_| "HEAD".into());
-        let mut rev_list = helpers::git(Some(&config.src));
-        rev_list.args(&[
-            PathBuf::from("rev-list"),
-            format!("--author={}", config.stage0_metadata.config.git_merge_commit_email).into(),
-            "-n1".into(),
-            "--first-parent".into(),
-            closest_upstream.into(),
-            "--".into(),
-            config.src.join("src/llvm-project"),
-            config.src.join("src/bootstrap/download-ci-llvm-stamp"),
-            // the LLVM shared object file is named `LLVM-12-rust-{version}-nightly`
-            config.src.join("src/version"),
-        ]);
-        output(rev_list.as_command_mut()).trim().to_owned()
+        helpers::get_closest_merge_base_commit(
+            Some(&config.src),
+            &config.git_config(),
+            &config.stage0_metadata.config.git_merge_commit_email,
+            &[
+                config.src.join("src/llvm-project"),
+                config.src.join("src/bootstrap/download-ci-llvm-stamp"),
+                // the LLVM shared object file is named `LLVM-12-rust-{version}-nightly`
+                config.src.join("src/version"),
+            ],
+        )
+        .unwrap()
     } else if let Some(info) = channel::read_commit_info_file(&config.src) {
         info.sha.trim().to_owned()
     } else {
diff --git a/src/bootstrap/src/core/build_steps/tool.rs b/src/bootstrap/src/core/build_steps/tool.rs
index 2f8b41334fc7c..087df2f8a88e2 100644
--- a/src/bootstrap/src/core/build_steps/tool.rs
+++ b/src/bootstrap/src/core/build_steps/tool.rs
@@ -9,7 +9,7 @@ use crate::core::builder::{Builder, Cargo as CargoCommand, RunConfig, ShouldRun,
 use crate::core::config::TargetSelection;
 use crate::utils::channel::GitInfo;
 use crate::utils::exec::{command, BootstrapCommand};
-use crate::utils::helpers::{add_dylib_path, exe, t};
+use crate::utils::helpers::{add_dylib_path, exe, get_closest_merge_base_commit, git, t};
 use crate::Compiler;
 use crate::Mode;
 use crate::{gha, Kind};
@@ -554,6 +554,57 @@ impl Step for Rustdoc {
         }
         let target = target_compiler.host;
 
+        let bin_rustdoc = || {
+            let sysroot = builder.sysroot(target_compiler);
+            let bindir = sysroot.join("bin");
+            t!(fs::create_dir_all(&bindir));
+            let bin_rustdoc = bindir.join(exe("rustdoc", target_compiler.host));
+            let _ = fs::remove_file(&bin_rustdoc);
+            bin_rustdoc
+        };
+
+        // If CI rustc is enabled and we haven't modified the rustdoc sources,
+        // use the precompiled rustdoc from CI rustc's sysroot to speed up bootstrapping.
+        if builder.download_rustc()
+            && target_compiler.stage > 0
+            && builder.rust_info().is_managed_git_subrepository()
+        {
+            let commit = get_closest_merge_base_commit(
+                Some(&builder.config.src),
+                &builder.config.git_config(),
+                &builder.config.stage0_metadata.config.git_merge_commit_email,
+                &[],
+            )
+            .unwrap();
+
+            let librustdoc_src = builder.config.src.join("src/librustdoc");
+            let rustdoc_src = builder.config.src.join("src/tools/rustdoc");
+
+            // FIXME: The change detection logic here is quite similar to `Config::download_ci_rustc_commit`.
+            // It would be better to unify them.
+            let has_changes = !git(Some(&builder.config.src))
+                .allow_failure()
+                .run_always()
+                .args(["diff-index", "--quiet", &commit])
+                .arg("--")
+                .arg(librustdoc_src)
+                .arg(rustdoc_src)
+                .run(builder)
+                .is_success();
+
+            if !has_changes {
+                let precompiled_rustdoc = builder
+                    .config
+                    .ci_rustc_dir()
+                    .join("bin")
+                    .join(exe("rustdoc", target_compiler.host));
+
+                let bin_rustdoc = bin_rustdoc();
+                builder.copy_link(&precompiled_rustdoc, &bin_rustdoc);
+                return bin_rustdoc;
+            }
+        }
+
         let build_compiler = if builder.download_rustc() && target_compiler.stage == 1 {
             // We already have the stage 1 compiler, we don't need to cut the stage.
             builder.compiler(target_compiler.stage, builder.config.build)
@@ -614,11 +665,7 @@ impl Step for Rustdoc {
 
         // don't create a stage0-sysroot/bin directory.
         if target_compiler.stage > 0 {
-            let sysroot = builder.sysroot(target_compiler);
-            let bindir = sysroot.join("bin");
-            t!(fs::create_dir_all(&bindir));
-            let bin_rustdoc = bindir.join(exe("rustdoc", target_compiler.host));
-            let _ = fs::remove_file(&bin_rustdoc);
+            let bin_rustdoc = bin_rustdoc();
             builder.copy_link(&tool_rustdoc, &bin_rustdoc);
             bin_rustdoc
         } else {
diff --git a/src/bootstrap/src/core/config/config.rs b/src/bootstrap/src/core/config/config.rs
index f96633b059a17..9d5aa795c6c0d 100644
--- a/src/bootstrap/src/core/config/config.rs
+++ b/src/bootstrap/src/core/config/config.rs
@@ -20,7 +20,7 @@ use crate::core::build_steps::llvm;
 use crate::core::config::flags::{Color, Flags, Warnings};
 use crate::utils::cache::{Interned, INTERNER};
 use crate::utils::channel::{self, GitInfo};
-use crate::utils::helpers::{self, exe, output, t};
+use crate::utils::helpers::{self, exe, get_closest_merge_base_commit, output, t};
 use build_helper::exit;
 use serde::{Deserialize, Deserializer};
 use serde_derive::Deserialize;
@@ -2471,14 +2471,13 @@ impl Config {
 
         // Look for a version to compare to based on the current commit.
         // Only commits merged by bors will have CI artifacts.
-        let merge_base = output(
-            helpers::git(Some(&self.src))
-                .arg("rev-list")
-                .arg(format!("--author={}", self.stage0_metadata.config.git_merge_commit_email))
-                .args(["-n1", "--first-parent", "HEAD"])
-                .as_command_mut(),
-        );
-        let commit = merge_base.trim_end();
+        let commit = get_closest_merge_base_commit(
+            Some(&self.src),
+            &self.git_config(),
+            &self.stage0_metadata.config.git_merge_commit_email,
+            &[],
+        )
+        .unwrap();
         if commit.is_empty() {
             println!("ERROR: could not find commit hash for downloading rustc");
             println!("HELP: maybe your repository history is too shallow?");
@@ -2489,7 +2488,7 @@ impl Config {
 
         // Warn if there were changes to the compiler or standard library since the ancestor commit.
         let has_changes = !t!(helpers::git(Some(&self.src))
-            .args(["diff-index", "--quiet", commit])
+            .args(["diff-index", "--quiet", &commit])
             .arg("--")
             .args([self.src.join("compiler"), self.src.join("library")])
             .as_command_mut()
@@ -2565,14 +2564,13 @@ impl Config {
     ) -> Option<String> {
         // Look for a version to compare to based on the current commit.
         // Only commits merged by bors will have CI artifacts.
-        let merge_base = output(
-            helpers::git(Some(&self.src))
-                .arg("rev-list")
-                .arg(format!("--author={}", self.stage0_metadata.config.git_merge_commit_email))
-                .args(["-n1", "--first-parent", "HEAD"])
-                .as_command_mut(),
-        );
-        let commit = merge_base.trim_end();
+        let commit = get_closest_merge_base_commit(
+            Some(&self.src),
+            &self.git_config(),
+            &self.stage0_metadata.config.git_merge_commit_email,
+            &[],
+        )
+        .unwrap();
         if commit.is_empty() {
             println!("error: could not find commit hash for downloading components from CI");
             println!("help: maybe your repository history is too shallow?");
@@ -2583,7 +2581,7 @@ impl Config {
 
         // Warn if there were changes to the compiler or standard library since the ancestor commit.
         let mut git = helpers::git(Some(&self.src));
-        git.args(["diff-index", "--quiet", commit, "--"]);
+        git.args(["diff-index", "--quiet", &commit, "--"]);
 
         // Handle running from a directory other than the top level
         let top_level = &self.src;
diff --git a/src/bootstrap/src/utils/helpers.rs b/src/bootstrap/src/utils/helpers.rs
index 3c82fa189be36..773a873e47cc0 100644
--- a/src/bootstrap/src/utils/helpers.rs
+++ b/src/bootstrap/src/utils/helpers.rs
@@ -3,6 +3,7 @@
 //! Simple things like testing the various filesystem operations here and there,
 //! not a lot of interesting happenings here unfortunately.
 
+use build_helper::git::{get_git_merge_base, output_result, GitConfig};
 use build_helper::util::fail;
 use std::env;
 use std::ffi::OsStr;
@@ -521,3 +522,26 @@ pub fn git(source_dir: Option<&Path>) -> BootstrapCommand {
 
     git
 }
+
+/// Returns the closest commit available from upstream for the given `author` and `target_paths`.
+///
+/// If it fails to find the commit from upstream using `git merge-base`, fallbacks to HEAD.
+pub fn get_closest_merge_base_commit(
+    source_dir: Option<&Path>,
+    config: &GitConfig<'_>,
+    author: &str,
+    target_paths: &[PathBuf],
+) -> Result<String, String> {
+    let mut git = git(source_dir).capture_stdout();
+
+    let merge_base = get_git_merge_base(config, source_dir).unwrap_or_else(|_| "HEAD".into());
+
+    git.arg(Path::new("rev-list"));
+    git.args([&format!("--author={author}"), "-n1", "--first-parent", &merge_base]);
+
+    if !target_paths.is_empty() {
+        git.arg("--").args(target_paths);
+    }
+
+    Ok(output_result(git.as_command_mut())?.trim().to_owned())
+}
diff --git a/src/tools/build_helper/src/git.rs b/src/tools/build_helper/src/git.rs
index b4522de6897d4..8be38dc855f7a 100644
--- a/src/tools/build_helper/src/git.rs
+++ b/src/tools/build_helper/src/git.rs
@@ -7,7 +7,7 @@ pub struct GitConfig<'a> {
 }
 
 /// Runs a command and returns the output
-fn output_result(cmd: &mut Command) -> Result<String, String> {
+pub fn output_result(cmd: &mut Command) -> Result<String, String> {
     let output = match cmd.stderr(Stdio::inherit()).output() {
         Ok(status) => status,
         Err(e) => return Err(format!("failed to run command: {:?}: {}", cmd, e)),
diff --git a/src/tools/run-make-support/src/lib.rs b/src/tools/run-make-support/src/lib.rs
index e6a45f57de608..b85191970de62 100644
--- a/src/tools/run-make-support/src/lib.rs
+++ b/src/tools/run-make-support/src/lib.rs
@@ -3,6 +3,10 @@
 //! notably is built via cargo: this means that if your test wants some non-trivial utility, such
 //! as `object` or `wasmparser`, they can be re-exported and be made available through this library.
 
+// We want to control use declaration ordering and spacing (and preserve use group comments), so
+// skip rustfmt on this file.
+#![cfg_attr(rustfmt, rustfmt::skip)]
+
 mod command;
 mod macros;
 mod util;
@@ -18,6 +22,8 @@ pub mod scoped_run;
 pub mod string;
 pub mod targets;
 
+// Internally we call our fs-related support module as `fs`, but re-export its content as `rfs`
+// to tests to avoid colliding with commonly used `use std::fs;`.
 mod fs;
 
 /// [`std::fs`] wrappers and assorted filesystem-related helpers. Public to tests as `rfs` to not be