Skip to content

Commit 99acc6a

Browse files
committed
Auto merge of rust-lang#133250 - DianQK:embed-bitcode-pgo, r=<try>
The embedded bitcode should always be prepared for LTO/ThinLTO Fixes rust-lang#115344. Fixes rust-lang#117220. There are currently two methods for generating bitcode that used for LTO. One method involves using `-C linker-plugin-lto` to emit object files as bitcode, which is the typical setting used by cargo. The other method is through `-C embed-bitcode=yes`. When using with `-C embed-bitcode=yes -C lto=no`, we run a complete non-LTO LLVM pipeline to obtain bitcode, then the bitcode is used for LTO. We run the Call Graph Profile Pass twice on the same module. This PR is doing something similar to LLVM's `buildFatLTODefaultPipeline`, obtaining the bitcode for embedding after running `buildThinLTOPreLinkDefaultPipeline`. r? nikic
2 parents 5e1440a + e4d3be7 commit 99acc6a

File tree

13 files changed

+280
-71
lines changed

13 files changed

+280
-71
lines changed

compiler/rustc_codegen_cranelift/src/driver/aot.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ fn produce_final_output_artifacts(
204204
// to get rid of it.
205205
for output_type in crate_output.outputs.keys() {
206206
match *output_type {
207-
OutputType::Bitcode | OutputType::ThinLinkBitcode => {
207+
OutputType::Bitcode | OutputType::ThinLinkBitcode | OutputType::ThinBitcode => {
208208
// Cranelift doesn't have bitcode
209209
// user_wants_bitcode = true;
210210
// // Copy to .bc, but always keep the .0.bc. There is a later

compiler/rustc_codegen_llvm/src/back/lto.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::ffi::{CStr, CString};
33
use std::fs::File;
44
use std::mem::ManuallyDrop;
55
use std::path::Path;
6+
use std::ptr::NonNull;
67
use std::sync::Arc;
78
use std::{io, iter, slice};
89

@@ -604,7 +605,7 @@ pub(crate) fn run_pass_manager(
604605
debug!("running the pass manager");
605606
let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
606607
let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
607-
unsafe { write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) }?;
608+
unsafe { write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage) }?;
608609
debug!("lto done");
609610
Ok(())
610611
}
@@ -663,6 +664,11 @@ impl ThinBuffer {
663664
ThinBuffer(buffer)
664665
}
665666
}
667+
668+
pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer {
669+
let mut ptr = NonNull::new(ptr).unwrap();
670+
ThinBuffer(unsafe { ptr.as_mut() })
671+
}
666672
}
667673

668674
impl ThinBufferMethods for ThinBuffer {

compiler/rustc_codegen_llvm/src/back/write.rs

+105-43
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use std::ffi::{CStr, CString};
22
use std::io::{self, Write};
33
use std::path::{Path, PathBuf};
4+
use std::ptr::null_mut;
45
use std::sync::Arc;
56
use std::{fs, slice, str};
67

@@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{
1516
TargetMachineFactoryFn,
1617
};
1718
use rustc_codegen_ssa::traits::*;
18-
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
19+
use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind};
1920
use rustc_data_structures::profiling::SelfProfilerRef;
2021
use rustc_data_structures::small_c_str::SmallCStr;
2122
use rustc_errors::{DiagCtxtHandle, FatalError, Level};
@@ -41,7 +42,7 @@ use crate::errors::{
4142
WithLlvmError, WriteBytecode,
4243
};
4344
use crate::llvm::diagnostic::OptimizationDiagnosticKind::*;
44-
use crate::llvm::{self, DiagnosticInfo, PassManager};
45+
use crate::llvm::{self, DiagnosticInfo, PassManager, ThinLTOBuffer};
4546
use crate::type_::Type;
4647
use crate::{LlvmCodegenBackend, ModuleLlvm, base, common, llvm_util};
4748

@@ -516,13 +517,24 @@ pub(crate) unsafe fn llvm_optimize(
516517
cgcx: &CodegenContext<LlvmCodegenBackend>,
517518
dcx: DiagCtxtHandle<'_>,
518519
module: &ModuleCodegen<ModuleLlvm>,
520+
thin_lto_buffer: Option<&mut *mut ThinLTOBuffer>,
519521
config: &ModuleConfig,
520522
opt_level: config::OptLevel,
521523
opt_stage: llvm::OptStage,
522524
) -> Result<(), FatalError> {
525+
if thin_lto_buffer.is_some() {
526+
assert!(
527+
matches!(
528+
opt_stage,
529+
llvm::OptStage::PreLinkNoLTO
530+
| llvm::OptStage::PreLinkFatLTO
531+
| llvm::OptStage::PreLinkThinLTO
532+
),
533+
"the bitcode for LTO can only be obtained at the pre-link stage"
534+
);
535+
}
523536
let unroll_loops =
524537
opt_level != config::OptLevel::Size && opt_level != config::OptLevel::SizeMin;
525-
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
526538
let pgo_gen_path = get_pgo_gen_path(config);
527539
let pgo_use_path = get_pgo_use_path(config);
528540
let pgo_sample_use_path = get_pgo_sample_use_path(config);
@@ -582,7 +594,9 @@ pub(crate) unsafe fn llvm_optimize(
582594
config.no_prepopulate_passes,
583595
config.verify_llvm_ir,
584596
config.lint_llvm_ir,
585-
using_thin_buffers,
597+
thin_lto_buffer,
598+
config.emit_thin_lto,
599+
config.emit_thin_lto_summary,
586600
config.merge_functions,
587601
unroll_loops,
588602
config.vectorize_slp,
@@ -637,7 +651,52 @@ pub(crate) unsafe fn optimize(
637651
_ if cgcx.opts.cg.linker_plugin_lto.enabled() => llvm::OptStage::PreLinkThinLTO,
638652
_ => llvm::OptStage::PreLinkNoLTO,
639653
};
640-
return unsafe { llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage) };
654+
// The embedded bitcode is used to run LTO/ThinLTO.
655+
// The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO.
656+
// It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at
657+
// this point.
658+
let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular
659+
&& config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full))
660+
|| config.emit_thin_lto_summary
661+
{
662+
Some(null_mut())
663+
} else {
664+
None
665+
};
666+
unsafe {
667+
llvm_optimize(cgcx, dcx, module, thin_lto_buffer.as_mut(), config, opt_level, opt_stage)
668+
}?;
669+
if let Some(thin_lto_buffer) = thin_lto_buffer {
670+
let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) };
671+
let thin_bc_out =
672+
if let Some(incr_comp_session_dir) = cgcx.incr_comp_session_dir.as_ref() {
673+
incr_comp_session_dir.join(pre_lto_embed_bitcode_filename(module_name.unwrap()))
674+
} else {
675+
cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name)
676+
};
677+
if let Err(err) = fs::write(&thin_bc_out, thin_lto_buffer.data()) {
678+
dcx.emit_err(WriteBytecode { path: &thin_bc_out, err });
679+
}
680+
let bc_summary_out =
681+
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
682+
if config.emit_thin_lto_summary
683+
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
684+
{
685+
let summary_data = thin_lto_buffer.thin_link_data();
686+
cgcx.prof.artifact_size(
687+
"llvm_bitcode_summary",
688+
thin_link_bitcode_filename.to_string_lossy(),
689+
summary_data.len() as u64,
690+
);
691+
let _timer = cgcx.prof.generic_activity_with_arg(
692+
"LLVM_module_codegen_emit_bitcode_summary",
693+
&*module.name,
694+
);
695+
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
696+
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
697+
}
698+
}
699+
}
641700
}
642701
Ok(())
643702
}
@@ -716,61 +775,60 @@ pub(crate) unsafe fn codegen(
716775
// requested.
717776
// - If we don't have the integrated assembler then we need to emit
718777
// asm from LLVM and use `gcc` to create the object file.
719-
720778
let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
721-
let bc_summary_out =
722-
cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name);
723779
let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name);
724780

725781
if config.bitcode_needed() {
726-
let _timer = cgcx
727-
.prof
728-
.generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name);
729-
let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary);
730-
let data = thin.data();
731-
732-
if let Some(bitcode_filename) = bc_out.file_name() {
733-
cgcx.prof.artifact_size(
734-
"llvm_bitcode",
735-
bitcode_filename.to_string_lossy(),
736-
data.len() as u64,
737-
);
738-
}
739-
740-
if config.emit_thin_lto_summary
741-
&& let Some(thin_link_bitcode_filename) = bc_summary_out.file_name()
742-
{
743-
let summary_data = thin.thin_link_data();
744-
cgcx.prof.artifact_size(
745-
"llvm_bitcode_summary",
746-
thin_link_bitcode_filename.to_string_lossy(),
747-
summary_data.len() as u64,
748-
);
749-
750-
let _timer = cgcx.prof.generic_activity_with_arg(
751-
"LLVM_module_codegen_emit_bitcode_summary",
752-
&*module.name,
753-
);
754-
if let Err(err) = fs::write(&bc_summary_out, summary_data) {
755-
dcx.emit_err(WriteBytecode { path: &bc_summary_out, err });
756-
}
757-
}
758-
782+
// If the object file of the target spec is bitcode, what happens when performing LTO in Rust?
759783
if config.emit_bc || config.emit_obj == EmitObj::Bitcode {
784+
let thin = {
785+
let _timer = cgcx.prof.generic_activity_with_arg(
786+
"LLVM_module_codegen_make_bitcode",
787+
&*module.name,
788+
);
789+
ThinBuffer::new(llmod, config.emit_thin_lto, false)
790+
};
791+
let data = thin.data();
760792
let _timer = cgcx
761793
.prof
762794
.generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name);
795+
if let Some(bitcode_filename) = bc_out.file_name() {
796+
cgcx.prof.artifact_size(
797+
"llvm_bitcode",
798+
bitcode_filename.to_string_lossy(),
799+
data.len() as u64,
800+
);
801+
}
763802
if let Err(err) = fs::write(&bc_out, data) {
764803
dcx.emit_err(WriteBytecode { path: &bc_out, err });
765804
}
766805
}
767806

768-
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) {
807+
if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)
808+
&& module.kind == ModuleKind::Regular
809+
{
769810
let _timer = cgcx
770811
.prof
771812
.generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name);
813+
let thin_bc_out = if let Some(incr_comp_session_dir) =
814+
cgcx.incr_comp_session_dir.as_ref()
815+
{
816+
incr_comp_session_dir.join(pre_lto_embed_bitcode_filename(module_name.unwrap()))
817+
} else {
818+
cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name)
819+
};
820+
assert!(
821+
thin_bc_out.exists(),
822+
"cannot find {} as embedded bitcode",
823+
thin_bc_out.display()
824+
);
825+
let data = fs::read(&thin_bc_out).unwrap();
826+
if cgcx.incr_comp_session_dir.is_none() {
827+
debug!("removing embed bitcode file {:?}", thin_bc_out);
828+
ensure_removed(dcx, &thin_bc_out);
829+
}
772830
unsafe {
773-
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data);
831+
embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &data);
774832
}
775833
}
776834
}
@@ -1136,3 +1194,7 @@ fn record_llvm_cgu_instructions_stats(prof: &SelfProfilerRef, llmod: &llvm::Modu
11361194
serde_json::from_str(&raw_stats).expect("cannot parse llvm cgu instructions stats");
11371195
prof.artifact_size("cgu_instructions", module, total);
11381196
}
1197+
1198+
fn pre_lto_embed_bitcode_filename(module_name: &str) -> String {
1199+
format!("{module_name}.{}", OutputType::ThinBitcode.extension())
1200+
}

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -2251,7 +2251,9 @@ unsafe extern "C" {
22512251
NoPrepopulatePasses: bool,
22522252
VerifyIR: bool,
22532253
LintIR: bool,
2254-
UseThinLTOBuffers: bool,
2254+
ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>,
2255+
EmitThinLTO: bool,
2256+
EmitThinLTOSummary: bool,
22552257
MergeFunctions: bool,
22562258
UnrollLoops: bool,
22572259
SLPVectorize: bool,

compiler/rustc_codegen_ssa/src/back/write.rs

+3
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,9 @@ fn produce_final_output_artifacts(
616616
// them for making an rlib.
617617
copy_if_one_unit(OutputType::Bitcode, true);
618618
}
619+
OutputType::ThinBitcode => {
620+
copy_if_one_unit(OutputType::ThinBitcode, true);
621+
}
619622
OutputType::ThinLinkBitcode => {
620623
copy_if_one_unit(OutputType::ThinLinkBitcode, false);
621624
}

0 commit comments

Comments
 (0)