From 9ef7d708b08af2c604e56dffd7b61bed2f378a24 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Sat, 29 May 2021 03:18:50 -0700 Subject: [PATCH 1/2] Wrap libraries in linker groups, allowing backwards/circular references Some native library sets, such as components of libc, libgcc, or compiler-builtins, require backwards or circular references. Additionally, because link order doesn't matter for native libraries when using dynamic linking, many crates don't take it into account for static linking either; this can introduce platform-specific issues. Rather than make the order of `#[link]` lines or `cargo:rustc-link-lib` lines semantically significant (and potentially even require specifying some libraries twice), wrap each set of libraries in linker groups. This ensures that symbols in all libraries resolve correctly regardless of library order. Linker groups of reasonable sizes don't have a substantial cost, but putting the entirey of a large link line inside a linker group can add enough cost to be noticeable. To minimize the cost of linker groups, use separate groups for local native libraries, upstream crates, and upstream native libraries. This doesn't allow backreferences between those groups, but in most cases we won't want such backreferences. --- compiler/rustc_codegen_ssa/src/back/link.rs | 91 +++++---------------- 1 file changed, 22 insertions(+), 69 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/back/link.rs b/compiler/rustc_codegen_ssa/src/back/link.rs index 6c9ec9e7b0dae..0451b42183d08 100644 --- a/compiler/rustc_codegen_ssa/src/back/link.rs +++ b/compiler/rustc_codegen_ssa/src/back/link.rs @@ -1810,11 +1810,18 @@ fn linker_with_args<'a, B: ArchiveBuilder<'a>>( add_sanitizer_libraries(sess, crate_type, cmd); // Object code from the current crate. - // Take careful note of the ordering of the arguments we pass to the linker - // here. Linkers will assume that things on the left depend on things to the - // right. Things on the right cannot depend on things on the left. This is - // all formally implemented in terms of resolving symbols (libs on the right - // resolve unknown symbols of libs on the left, but not vice versa). + // + // Some native libraries can have circular dependencies, as do some internal + // crates in the standard library (notably libcore and libstd via "weak lang + // items" like `rust_begin_unwind`). And even when they don't, we don't want + // to force crates to provide native libraries in dependency order. So, we + // provide all libraries within a linker group. This ensures that symbols in + // all libraries resolve correctly regardless of library order. + // + // However, we still take care to pass arguments to the linker in a + // topologically sorted order, because that preserves expected symbol + // resolution ordering between libraries even if multiple libraries may + // provide the same symbol. // // For this reason, we have organized the arguments we pass to the linker as // such: @@ -1856,18 +1863,24 @@ fn linker_with_args<'a, B: ArchiveBuilder<'a>>( // This change is somewhat breaking in practice due to local static libraries being linked // as whole-archive (#85144), so removing whole-archive may be a pre-requisite. if sess.opts.debugging_opts.link_native_libraries { + cmd.group_start(); add_local_native_libraries(cmd, sess, codegen_results); + cmd.group_end(); } // Rust libraries. + cmd.group_start(); add_upstream_rust_crates::(cmd, sess, codegen_results, crate_type, tmpdir); + cmd.group_end(); // Native libraries linked with `#[link]` attributes at and `-l` command line options. // If -Zlink-native-libraries=false is set, then the assumption is that an // external build system already has the native dependencies defined, and it // will provide them to the linker itself. if sess.opts.debugging_opts.link_native_libraries { + cmd.group_start(); add_upstream_native_libraries(cmd, sess, codegen_results, crate_type); + cmd.group_end(); } // Library linking above uses some global state for things like `-Bstatic`/`-Bdynamic` to make @@ -2114,64 +2127,9 @@ fn add_upstream_rust_crates<'a, B: ArchiveBuilder<'a>>( // crates. let deps = &codegen_results.crate_info.used_crates_dynamic; - // There's a few internal crates in the standard library (aka libcore and - // libstd) which actually have a circular dependence upon one another. This - // currently arises through "weak lang items" where libcore requires things - // like `rust_begin_unwind` but libstd ends up defining it. To get this - // circular dependence to work correctly in all situations we'll need to be - // sure to correctly apply the `--start-group` and `--end-group` options to - // GNU linkers, otherwise if we don't use any other symbol from the standard - // library it'll get discarded and the whole application won't link. - // - // In this loop we're calculating the `group_end`, after which crate to - // pass `--end-group` and `group_start`, before which crate to pass - // `--start-group`. We currently do this by passing `--end-group` after - // the first crate (when iterating backwards) that requires a lang item - // defined somewhere else. Once that's set then when we've defined all the - // necessary lang items we'll pass `--start-group`. - // - // Note that this isn't amazing logic for now but it should do the trick - // for the current implementation of the standard library. - let mut group_end = None; - let mut group_start = None; - // Crates available for linking thus far. - let mut available = FxHashSet::default(); - // Crates required to satisfy dependencies discovered so far. - let mut required = FxHashSet::default(); - - let info = &codegen_results.crate_info; - for &(cnum, _) in deps.iter().rev() { - if let Some(missing) = info.missing_lang_items.get(&cnum) { - let missing_crates = missing.iter().map(|i| info.lang_item_to_crate.get(i).copied()); - required.extend(missing_crates); - } - - required.insert(Some(cnum)); - available.insert(Some(cnum)); - - if required.len() > available.len() && group_end.is_none() { - group_end = Some(cnum); - } - if required.len() == available.len() && group_end.is_some() { - group_start = Some(cnum); - break; - } - } - - // If we didn't end up filling in all lang items from upstream crates then - // we'll be filling it in with our crate. This probably means we're the - // standard library itself, so skip this for now. - if group_end.is_some() && group_start.is_none() { - group_end = None; - } - let mut compiler_builtins = None; for &(cnum, _) in deps.iter() { - if group_start == Some(cnum) { - cmd.group_start(); - } - // We may not pass all crates through to the linker. Some crates may // appear statically in an existing dylib, meaning we'll pick up all the // symbols from the dylib. @@ -2192,10 +2150,6 @@ fn add_upstream_rust_crates<'a, B: ArchiveBuilder<'a>>( } Linkage::Dynamic => add_dynamic_crate(cmd, sess, &src.dylib.as_ref().unwrap().0), } - - if group_end == Some(cnum) { - cmd.group_end(); - } } // compiler-builtins are always placed last to ensure that they're @@ -2363,11 +2317,10 @@ fn add_upstream_native_libraries( codegen_results: &CodegenResults, crate_type: CrateType, ) { - // Be sure to use a topological sorting of crates because there may be - // interdependencies between native libraries. When passing -nodefaultlibs, - // for example, almost all native libraries depend on libc, so we have to - // make sure that's all the way at the right (liblibc is near the base of - // the dependency chain). + // We put all static libraries in a linker group so that later libraries may + // depend on earlier ones, but we still use a topological sorting of crates + // because that preserves expected symbol resolution ordering between + // libraries even if multiple libraries may provide the same symbol. // // This passes RequireStatic, but the actual requirement doesn't matter, // we're just getting an ordering of crate numbers, we're not worried about From 09b4e49b0402e8ad83b4d1f78d36f15884445b23 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Thu, 24 Jun 2021 21:21:27 -0700 Subject: [PATCH 2/2] rustc_codegen_ssa: Drop lang_item_to_crate and missing_lang_items These fields were both only used for library ordering, so they're now both unused. --- compiler/rustc_codegen_ssa/src/base.rs | 16 ---------------- compiler/rustc_codegen_ssa/src/lib.rs | 3 --- 2 files changed, 19 deletions(-) diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index 38ab39febe066..166fe16e1bb6a 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -21,7 +21,6 @@ use rustc_index::vec::Idx; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs; use rustc_middle::middle::cstore::EncodedMetadata; use rustc_middle::middle::cstore::{self, LinkagePreference}; -use rustc_middle::middle::lang_items; use rustc_middle::mir::mono::{CodegenUnit, CodegenUnitNameBuilder, MonoItem}; use rustc_middle::ty::layout::{HasTyCtxt, TyAndLayout}; use rustc_middle::ty::layout::{FAT_PTR_ADDR, FAT_PTR_EXTRA}; @@ -782,12 +781,9 @@ impl CrateInfo { used_crates_dynamic: cstore::used_crates(tcx, LinkagePreference::RequireDynamic), used_crates_static: cstore::used_crates(tcx, LinkagePreference::RequireStatic), used_crate_source: Default::default(), - lang_item_to_crate: Default::default(), - missing_lang_items: Default::default(), dependency_formats: tcx.dependency_formats(()), windows_subsystem, }; - let lang_items = tcx.lang_items(); let crates = tcx.crates(); @@ -795,7 +791,6 @@ impl CrateInfo { info.native_libraries.reserve(n_crates); info.crate_name.reserve(n_crates); info.used_crate_source.reserve(n_crates); - info.missing_lang_items.reserve(n_crates); for &cnum in crates.iter() { info.native_libraries @@ -814,17 +809,6 @@ impl CrateInfo { if tcx.is_no_builtins(cnum) { info.is_no_builtins.insert(cnum); } - let missing = tcx.missing_lang_items(cnum); - for &item in missing.iter() { - if let Ok(id) = lang_items.require(item) { - info.lang_item_to_crate.insert(item, id.krate); - } - } - - // No need to look for lang items that don't actually need to exist. - let missing = - missing.iter().cloned().filter(|&l| lang_items::required(tcx, l)).collect(); - info.missing_lang_items.insert(cnum, missing); } info diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index b6de12fa35e37..a01af5d5730d1 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -22,7 +22,6 @@ use rustc_ast as ast; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::sync::Lrc; use rustc_hir::def_id::CrateNum; -use rustc_hir::LangItem; use rustc_middle::dep_graph::WorkProduct; use rustc_middle::middle::cstore::{self, CrateSource, LibSource}; use rustc_middle::middle::dependency_format::Dependencies; @@ -146,8 +145,6 @@ pub struct CrateInfo { pub used_crate_source: FxHashMap>, pub used_crates_static: Vec<(CrateNum, LibSource)>, pub used_crates_dynamic: Vec<(CrateNum, LibSource)>, - pub lang_item_to_crate: FxHashMap, - pub missing_lang_items: FxHashMap>, pub dependency_formats: Lrc, pub windows_subsystem: Option, }