Skip to content

Commit 32549d4

Browse files
committed
Add the unstable option to reduce the binary size of dynamic library based on service requirements
1 parent 317d14a commit 32549d4

File tree

6 files changed

+232
-5
lines changed

6 files changed

+232
-5
lines changed

compiler/rustc_interface/src/tests.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ use rustc_session::config::{
88
FunctionReturn, InliningThreshold, Input, InstrumentCoverage, InstrumentXRay,
99
LinkSelfContained, LinkerPluginLto, LocationDetail, LtoCli, MirSpanview, OomStrategy, Options,
1010
OutFileName, OutputType, OutputTypes, PAuthKey, PacRet, Passes, Polonius,
11-
ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, SymbolManglingVersion, TraitSolver,
12-
WasiExecModel,
11+
ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, SymbolManglingPlugin, SymbolManglingVersion,
12+
TraitSolver, WasiExecModel,
1313
};
1414
use rustc_session::lint::Level;
1515
use rustc_session::search_paths::SearchPath;
@@ -818,6 +818,7 @@ fn test_unstable_options_tracking_hash() {
818818
tracked!(split_lto_unit, Some(true));
819819
tracked!(src_hash_algorithm, Some(SourceFileHashAlgorithm::Sha1));
820820
tracked!(stack_protector, StackProtector::All);
821+
tracked!(symbol_mangling_plugin, SymbolManglingPlugin::new());
821822
tracked!(teach, true);
822823
tracked!(thinlto, Some(true));
823824
tracked!(thir_unsafeck, true);

compiler/rustc_session/src/config.rs

+131-3
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@ use rustc_target::abi::Align;
2121
use rustc_target::spec::LinkSelfContainedComponents;
2222
use rustc_target::spec::{PanicStrategy, RelocModel, SanitizerSet, SplitDebuginfo};
2323
use rustc_target::spec::{Target, TargetTriple, TargetWarnings, TARGETS};
24+
use std::cmp::PartialEq;
2425
use std::collections::btree_map::{
2526
Iter as BTreeMapIter, Keys as BTreeMapKeysIter, Values as BTreeMapValuesIter,
2627
};
2728
use std::collections::{BTreeMap, BTreeSet};
2829
use std::ffi::OsStr;
2930
use std::fmt;
3031
use std::fs;
31-
use std::hash::Hash;
32+
use std::hash::{Hash, Hasher};
3233
use std::iter;
3334
use std::path::{Path, PathBuf};
3435
use std::str::{self, FromStr};
@@ -364,6 +365,126 @@ pub enum SymbolManglingVersion {
364365
V0,
365366
}
366367

368+
#[derive(Clone, Debug)]
369+
pub struct SymbolManglingPlugin {
370+
fulls: FxHashSet<String>,
371+
prefixes: Vec<String>,
372+
salt: String,
373+
level: u8,
374+
excluded: bool,
375+
}
376+
377+
impl SymbolManglingPlugin {
378+
pub fn new() -> Self {
379+
Self {
380+
fulls: FxHashSet::default(),
381+
prefixes: Vec::new(),
382+
salt: String::new(),
383+
level: 2,
384+
excluded: false,
385+
}
386+
}
387+
388+
pub fn enabled(&self) -> bool {
389+
!self.fulls.is_empty() || !self.prefixes.is_empty() || self.excluded
390+
}
391+
392+
pub fn hasher_enable(&mut self, args: &str) -> bool {
393+
let cloned = self.clone();
394+
if self.hasher_reinit(args) {
395+
return true;
396+
}
397+
self.fulls = cloned.fulls;
398+
self.prefixes = cloned.prefixes;
399+
self.level = cloned.level;
400+
self.salt = cloned.salt;
401+
self.excluded = cloned.excluded;
402+
false
403+
}
404+
405+
pub fn hasher_args(&self) -> (&str, u8) {
406+
(&self.salt, self.level)
407+
}
408+
409+
pub fn hasher_contains(&self, val: &str) -> bool {
410+
if self.fulls.contains(val) {
411+
return self.excluded ^ true;
412+
}
413+
for prefix in self.prefixes.iter() {
414+
if val.starts_with(prefix) {
415+
return self.excluded ^ true;
416+
}
417+
}
418+
self.excluded ^ false
419+
}
420+
421+
fn hasher_reinit(&mut self, args: &str) -> bool {
422+
for arg in args.split(',') {
423+
let mut it = arg.split('=');
424+
let Some(name) = it.next() else { continue; };
425+
if let Some(value) = it.next() {
426+
match name {
427+
"salt" => self.salt = value.to_string(),
428+
"level" => {
429+
match value {
430+
"1" => self.level = 1,
431+
"2" => self.level = 2,
432+
_ => return false,
433+
}
434+
},
435+
"excluded" => {
436+
match value {
437+
"true" => self.excluded = true,
438+
"false" => self.excluded = false,
439+
_ => return false,
440+
}
441+
},
442+
_ => return false,
443+
}
444+
} else if name.ends_with("*") {
445+
let _ = self.prefixes.push(name[..name.len() - 1].to_string());
446+
} else {
447+
let _ = self.fulls.insert(name.to_string());
448+
}
449+
}
450+
true
451+
}
452+
453+
fn to_vec(&self) -> Vec<&str> {
454+
let mut ret = Vec::with_capacity(self.fulls.len() + self.prefixes.len());
455+
#[allow(rustc::potential_query_instability)]
456+
self.fulls.iter().for_each(|val| ret.push(val.as_str()));
457+
ret.sort();
458+
self.prefixes.iter().for_each(|val| ret.push(val.as_str()));
459+
ret[self.fulls.len()..].sort();
460+
ret
461+
}
462+
}
463+
464+
impl Hash for SymbolManglingPlugin {
465+
fn hash<H>(&self, hasher: &mut H) where H: Hasher {
466+
for val in self.to_vec() {
467+
val.hash(hasher);
468+
}
469+
self.fulls.len().hash(hasher);
470+
self.prefixes.len().hash(hasher);
471+
self.salt.hash(hasher);
472+
self.level.hash(hasher);
473+
self.excluded.hash(hasher);
474+
}
475+
}
476+
477+
impl PartialEq for SymbolManglingPlugin {
478+
fn eq(&self, other: &Self) -> bool {
479+
self.excluded == other.excluded &&
480+
self.level == other.level &&
481+
self.salt == other.salt &&
482+
self.fulls.len() == other.fulls.len() &&
483+
self.prefixes.len() == other.prefixes.len() &&
484+
self.to_vec() == other.to_vec()
485+
}
486+
}
487+
367488
#[derive(Clone, Copy, Debug, PartialEq, Hash)]
368489
pub enum DebugInfo {
369490
None,
@@ -2717,6 +2838,12 @@ pub fn build_session_options(
27172838
);
27182839
}
27192840

2841+
if unstable_opts.symbol_mangling_plugin.enabled() {
2842+
handler.early_error(
2843+
"option `-C instrument-coverage` is not compatible with either `-Z symbol_mangling_plugin`"
2844+
);
2845+
}
2846+
27202847
// `-C instrument-coverage` implies `-C symbol-mangling-version=v0` - to ensure consistent
27212848
// and reversible name mangling. Note, LLVM coverage tools can analyze coverage over
27222849
// multiple runs, including some changes to source code; so mangled names must be consistent
@@ -3176,8 +3303,8 @@ pub(crate) mod dep_tracking {
31763303
ErrorOutputType, FunctionReturn, InliningThreshold, InstrumentCoverage, InstrumentXRay,
31773304
LinkerPluginLto, LocationDetail, LtoCli, OomStrategy, OptLevel, OutFileName, OutputType,
31783305
OutputTypes, Polonius, RemapPathScopeComponents, ResolveDocLinks, SourceFileHashAlgorithm,
3179-
SplitDwarfKind, SwitchWithOptPath, SymbolManglingVersion, TraitSolver, TrimmedDefPaths,
3180-
WasiExecModel,
3306+
SplitDwarfKind, SwitchWithOptPath, SymbolManglingPlugin, SymbolManglingVersion, TraitSolver,
3307+
TrimmedDefPaths, WasiExecModel,
31813308
};
31823309
use crate::lint;
31833310
use crate::utils::NativeLib;
@@ -3268,6 +3395,7 @@ pub(crate) mod dep_tracking {
32683395
SplitDwarfKind,
32693396
StackProtector,
32703397
SwitchWithOptPath,
3398+
SymbolManglingPlugin,
32713399
SymbolManglingVersion,
32723400
RemapPathScopeComponents,
32733401
SourceFileHashAlgorithm,

compiler/rustc_session/src/options.rs

+18
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,7 @@ mod desc {
402402
pub const parse_switch_with_opt_path: &str =
403403
"an optional path to the profiling data output directory";
404404
pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`";
405+
pub const parse_symbol_mangling_plugin: &str = "configuration parameters of the hasher plugin: `hasher:<crate>[*],...[,salt=<value>][,level=1|2][,excluded=true|false]`";
405406
pub const parse_symbol_mangling_version: &str = "either `legacy` or `v0` (RFC 2603)";
406407
pub const parse_src_file_hash: &str = "either `md5` or `sha1`";
407408
pub const parse_relocation_model: &str =
@@ -1171,6 +1172,21 @@ mod parse {
11711172
true
11721173
}
11731174

1175+
pub(crate) fn parse_symbol_mangling_plugin(
1176+
slot: &mut SymbolManglingPlugin,
1177+
v: Option<&str>,
1178+
) -> bool {
1179+
if let Some(v) = v {
1180+
// only support hasher
1181+
let plugin = "hasher:";
1182+
if v.starts_with(plugin) {
1183+
return slot.hasher_enable(&v[plugin.len()..]);
1184+
}
1185+
return false;
1186+
}
1187+
true
1188+
}
1189+
11741190
pub(crate) fn parse_src_file_hash(
11751191
slot: &mut Option<SourceFileHashAlgorithm>,
11761192
v: Option<&str>,
@@ -1880,6 +1896,8 @@ written to standard error output)"),
18801896
"prefer dynamic linking to static linking for staticlibs (default: no)"),
18811897
strict_init_checks: bool = (false, parse_bool, [TRACKED],
18821898
"control if mem::uninitialized and mem::zeroed panic on more UB"),
1899+
symbol_mangling_plugin: SymbolManglingPlugin = (SymbolManglingPlugin::new(), parse_symbol_mangling_plugin, [TRACKED],
1900+
"the hasher plugin controls symbol scope and hash parameter that require hash (default: hash is not required)"),
18831901
#[rustc_lint_opt_deny_field_access("use `Session::teach` instead of this field")]
18841902
teach: bool = (false, parse_bool, [TRACKED],
18851903
"show extended diagnostic help (default: no)"),

compiler/rustc_symbol_mangling/src/lib.rs

+7
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ use rustc_session::config::SymbolManglingVersion;
114114

115115
mod legacy;
116116
mod v0;
117+
mod plugin;
117118

118119
pub mod errors;
119120
pub mod test;
@@ -268,6 +269,12 @@ fn compute_symbol_name<'tcx>(
268269
SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate),
269270
};
270271

272+
let symbol = if tcx.sess.opts.unstable_opts.symbol_mangling_plugin.enabled() {
273+
plugin::process(tcx, symbol, def_id)
274+
} else {
275+
symbol
276+
};
277+
271278
debug_assert!(
272279
rustc_demangle::try_demangle(&symbol).is_ok(),
273280
"compute_symbol_name: `{symbol}` cannot be demangled"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
2+
use rustc_hir::def_id::DefId;
3+
use rustc_middle::ty::TyCtxt;
4+
5+
pub(super) fn process<'tcx>(
6+
tcx: TyCtxt<'tcx>,
7+
symbol: String,
8+
def_id: DefId,
9+
) -> String {
10+
let crate_name = tcx.crate_name(def_id.krate);
11+
let crate_name = crate_name.as_str();
12+
let symbol_mangling_plugin = &tcx.sess.opts.unstable_opts.symbol_mangling_plugin;
13+
if !symbol_mangling_plugin.hasher_contains(crate_name) {
14+
return symbol;
15+
}
16+
17+
let (salt, level) = symbol_mangling_plugin.hasher_args();
18+
19+
let hash = tcx.with_stable_hashing_context(|mut hcx| {
20+
let mut hasher = StableHasher::new();
21+
symbol.hash_stable(&mut hcx, &mut hasher);
22+
salt.hash_stable(&mut hcx, &mut hasher);
23+
hasher.finish::<Hash64>().as_u64()
24+
});
25+
26+
match level {
27+
1 => encode_1(tcx, crate_name, hash, def_id),
28+
_ => encode_2(tcx, crate_name, hash, def_id),
29+
}
30+
}
31+
32+
fn encode_1<'tcx>(
33+
tcx: TyCtxt<'tcx>,
34+
crate_name: &str,
35+
hash: u64,
36+
def_id: DefId,
37+
) -> String {
38+
if let Some(item_name) = tcx.opt_item_name(def_id) {
39+
let item_name = item_name.as_str();
40+
format!("_ZN{}{crate_name}.{item_name}.{:08x}E", crate_name.len() + item_name.len() + 11, hash >> 8)
41+
} else {
42+
encode_2(tcx, crate_name, hash, def_id)
43+
}
44+
}
45+
46+
fn encode_2<'tcx>(
47+
_tcx: TyCtxt<'tcx>,
48+
crate_name: &str,
49+
hash: u64,
50+
_def_id: DefId,
51+
) -> String {
52+
format!("_ZN{}{crate_name}.{hash:016x}E", crate_name.len() + 18)
53+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# `symbol_mangling_plugin`
2+
3+
Instead of defining a new mangling rule, it provides a plug-in for reprocessing mangling symbol names.
4+
5+
The average length of symbol names in the rust standard library is about 100 bytes, while the average length of symbol names in the C++ standard library is about 65 bytes. In some embedded environments where dynamic library are widely used, rust dynamic library symbol name space hash become one of the key bottlenecks of application. The plug-in mechanism provided here can help us eliminate this bottlenech.
6+
7+
The plug-in information is not written into the generated binary file. Therefore, you need to ensure that the plug-in configuration is consistent in multiple build environments. For example, the configuration parameters of the plug-in must be consistent in the build project of the dynamic library and the build project that depends on the dynamic library. Otherwise, an `undefined symbol` or `undefined version` error occurs.
8+
9+
The value of this parameter is in the format of `-Z symbol_mangling_plugin=<plugin name>:<plugin arguments>`. Currently only one plug-in is available: `hasher`.
10+
11+
## Hasher plug-in
12+
13+
The configuration format is `-Z symbol_mangling_plugin=hasher:<crate>[*],...[,excluded=true|false][,level=1|2][,salt=<value>]`.
14+
15+
In the preceding information, `<crate>` matches the name of the crate. If the name ends with `*`, the prefix is matched. The hasher plug-in only reprocesses the symbol names in (or not in, if `excluded=true`) specified crate. The hasher plug-in uses the hash value to replace the complete symbol names, compressing the symbol name space and avoid symbol conflicts.
16+
17+
If `level=`, the new symbol name format is `_ZN{length}{crate}.{item}.{hash32}E`. Otherwise, the new symbol name format is `_ZN{length}{crate}.{hash64}E`, which is the default format.
18+
19+
`salt` can specify a salt value for hash calculation, which reduces security risks caused by malicious replacement of dynamic libraries and increases security.
20+

0 commit comments

Comments
 (0)