Skip to content

Commit 1a4330d

Browse files
committed
rustc: Handle modules in "fat" LTO more robustly
When performing a "fat" LTO the compiler has a whole mess of codegen units that it links together. To do this it needs to select one module as a "base" module and then link everything else into this module. Previously LTO passes assume that there's at least one module in-memory to link into, but nowadays that's not always true! With incremental compilation modules may actually largely be cached and it may be possible that there's no in-memory modules to work with. This commit updates the logic of the LTO backend to handle modules a bit more uniformly during a fat LTO. This commit immediately splits them into two lists, one serialized and one in-memory. The in-memory list is then searched for the largest module and failing that we simply deserialize the first serialized module and link into that. This refactoring avoids juggling three lists, two of which are serialized modules and one of which is half serialized and half in-memory. Closes #63349
1 parent 0396aac commit 1a4330d

File tree

5 files changed

+59
-49
lines changed

5 files changed

+59
-49
lines changed

src/librustc_codegen_llvm/back/lto.rs

+41-45
Original file line numberDiff line numberDiff line change
@@ -183,14 +183,40 @@ pub(crate) fn prepare_thin(
183183

184184
fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
185185
diag_handler: &Handler,
186-
mut modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
186+
modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
187187
cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
188188
mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
189189
symbol_white_list: &[*const libc::c_char])
190190
-> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
191191
{
192192
info!("going for a fat lto");
193193

194+
// Sort out all our lists of incoming modules into two lists.
195+
//
196+
// * `serialized_modules` (also and argument to this function) contains all
197+
// modules that are serialized in-memory.
198+
// * `in_memory` contains modules which are already parsed and in-memory,
199+
// such as from multi-CGU builds.
200+
//
201+
// All of `cached_modules` (cached from previous incremental builds) can
202+
// immediately go onto the `serialized_modules` modules list and then we can
203+
// split the `modules` array into these two lists.
204+
let mut in_memory = Vec::new();
205+
serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
206+
info!("pushing cached module {:?}", wp.cgu_name);
207+
(buffer, CString::new(wp.cgu_name).unwrap())
208+
}));
209+
for module in modules {
210+
match module {
211+
FatLTOInput::InMemory(m) => in_memory.push(m),
212+
FatLTOInput::Serialized { name, buffer } => {
213+
info!("pushing serialized module {:?}", name);
214+
let buffer = SerializedModule::Local(buffer);
215+
serialized_modules.push((buffer, CString::new(name).unwrap()));
216+
}
217+
}
218+
}
219+
194220
// Find the "costliest" module and merge everything into that codegen unit.
195221
// All the other modules will be serialized and reparsed into the new
196222
// context, so this hopefully avoids serializing and parsing the largest
@@ -200,14 +226,8 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
200226
// file copy operations in the backend work correctly. The only other kind
201227
// of module here should be an allocator one, and if your crate is smaller
202228
// than the allocator module then the size doesn't really matter anyway.
203-
let costliest_module = modules.iter()
229+
let costliest_module = in_memory.iter()
204230
.enumerate()
205-
.filter_map(|(i, module)| {
206-
match module {
207-
FatLTOInput::InMemory(m) => Some((i, m)),
208-
FatLTOInput::Serialized { .. } => None,
209-
}
210-
})
211231
.filter(|&(_, module)| module.kind == ModuleKind::Regular)
212232
.map(|(i, module)| {
213233
let cost = unsafe {
@@ -223,26 +243,14 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
223243
// re-executing the LTO passes. If that's the case deserialize the first
224244
// module and create a linker with it.
225245
let module: ModuleCodegen<ModuleLlvm> = match costliest_module {
226-
Some((_cost, i)) => {
227-
match modules.remove(i) {
228-
FatLTOInput::InMemory(m) => m,
229-
FatLTOInput::Serialized { .. } => unreachable!(),
230-
}
231-
}
246+
Some((_cost, i)) => in_memory.remove(i),
232247
None => {
233-
let pos = modules.iter().position(|m| {
234-
match m {
235-
FatLTOInput::InMemory(_) => false,
236-
FatLTOInput::Serialized { .. } => true,
237-
}
238-
}).expect("must have at least one serialized module");
239-
let (name, buffer) = match modules.remove(pos) {
240-
FatLTOInput::Serialized { name, buffer } => (name, buffer),
241-
FatLTOInput::InMemory(_) => unreachable!(),
242-
};
248+
assert!(serialized_modules.len() > 0, "must have at least one serialized module");
249+
let (buffer, name) = serialized_modules.remove(0);
250+
info!("no in-memory regular modules to choose from, parsing {:?}", name);
243251
ModuleCodegen {
244-
module_llvm: ModuleLlvm::parse(cgcx, &name, &buffer, diag_handler)?,
245-
name,
252+
module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), diag_handler)?,
253+
name: name.into_string().unwrap(),
246254
kind: ModuleKind::Regular,
247255
}
248256
}
@@ -265,25 +273,13 @@ fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
265273
// and we want to move everything to the same LLVM context. Currently the
266274
// way we know of to do that is to serialize them to a string and them parse
267275
// them later. Not great but hey, that's why it's "fat" LTO, right?
268-
let mut new_modules = modules.into_iter().map(|module| {
269-
match module {
270-
FatLTOInput::InMemory(module) => {
271-
let buffer = ModuleBuffer::new(module.module_llvm.llmod());
272-
let llmod_id = CString::new(&module.name[..]).unwrap();
273-
(SerializedModule::Local(buffer), llmod_id)
274-
}
275-
FatLTOInput::Serialized { name, buffer } => {
276-
let llmod_id = CString::new(name).unwrap();
277-
(SerializedModule::Local(buffer), llmod_id)
278-
}
279-
}
280-
}).collect::<Vec<_>>();
276+
for module in in_memory {
277+
let buffer = ModuleBuffer::new(module.module_llvm.llmod());
278+
let llmod_id = CString::new(&module.name[..]).unwrap();
279+
serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
280+
}
281281
// Sort the modules to ensure we produce deterministic results.
282-
new_modules.sort_by(|module1, module2| module1.1.partial_cmp(&module2.1).unwrap());
283-
serialized_modules.extend(new_modules);
284-
serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
285-
(buffer, CString::new(wp.cgu_name).unwrap())
286-
}));
282+
serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1));
287283

288284
// For all serialized bitcode files we parse them and link them in as we did
289285
// above, this is all mostly handled in C++. Like above, though, we don't
@@ -850,7 +846,7 @@ fn module_name_to_str(c_str: &CStr) -> &str {
850846
bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e))
851847
}
852848

853-
fn parse_module<'a>(
849+
pub fn parse_module<'a>(
854850
cx: &'a llvm::Context,
855851
name: &CStr,
856852
data: &[u8],

src/librustc_codegen_llvm/lib.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ use syntax_pos::symbol::InternedString;
5454
pub use llvm_util::target_features;
5555
use std::any::Any;
5656
use std::sync::{mpsc, Arc};
57+
use std::ffi::CStr;
5758

5859
use rustc::dep_graph::DepGraph;
5960
use rustc::middle::cstore::{EncodedMetadata, MetadataLoader};
@@ -386,13 +387,13 @@ impl ModuleLlvm {
386387

387388
fn parse(
388389
cgcx: &CodegenContext<LlvmCodegenBackend>,
389-
name: &str,
390-
buffer: &back::lto::ModuleBuffer,
390+
name: &CStr,
391+
buffer: &[u8],
391392
handler: &Handler,
392393
) -> Result<Self, FatalError> {
393394
unsafe {
394395
let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
395-
let llmod_raw = buffer.parse(name, llcx, handler)?;
396+
let llmod_raw = back::lto::parse_module(llcx, name, buffer, handler)?;
396397
let tm = match (cgcx.tm_factory.0)() {
397398
Ok(m) => m,
398399
Err(e) => {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
-include ../tools.mk
2+
3+
all: cdylib-fat cdylib-thin
4+
5+
cdylib-fat:
6+
$(RUSTC) lib.rs -C lto=fat -C opt-level=3 -C incremental=$(TMPDIR)/inc-fat
7+
$(RUSTC) lib.rs -C lto=fat -C opt-level=3 -C incremental=$(TMPDIR)/inc-fat
8+
9+
cdylib-thin:
10+
$(RUSTC) lib.rs -C lto=thin -C opt-level=3 -C incremental=$(TMPDIR)/inc-thin
11+
$(RUSTC) lib.rs -C lto=thin -C opt-level=3 -C incremental=$(TMPDIR)/inc-thin
12+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
#![crate_type = "cdylib"]
+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
warning: Linking globals named 'foo': symbol multiply defined!
22

3-
error: failed to load bc of "lto_duplicate_symbols1.3a1fbbbh-cgu.0":
3+
error: failed to load bc of "lto_duplicate_symbols2.3a1fbbbh-cgu.0":
44

55
error: aborting due to previous error
66

0 commit comments

Comments
 (0)