Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_llvm/src/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
to_add.push(uwtable_attr(cx.llcx, sess.opts.unstable_opts.use_sync_unwind));
}

if sess.opts.unstable_opts.profile_sample_use.is_some() {
if sess.opts.cg.profile_sample_use.is_some() {
to_add.push(llvm::CreateAttrString(cx.llcx, "use-sample-profile"));
}

Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/back/linker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ impl<'a> GccLinker<'a> {
config::OptLevel::Aggressive => "O3",
};

if let Some(path) = &self.sess.opts.unstable_opts.profile_sample_use {
if let Some(path) = &self.sess.opts.cg.profile_sample_use {
self.link_arg(&format!("-plugin-opt=sample-profile={}", path.display()));
};
let prefix = if self.codegen_backend == "gcc" {
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_ssa/src/back/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,8 +169,8 @@ impl ModuleConfig {
SwitchWithOptPath::Disabled
),
pgo_use: if_regular!(sess.opts.cg.profile_use.clone(), None),
pgo_sample_use: if_regular!(sess.opts.unstable_opts.profile_sample_use.clone(), None),
debug_info_for_profiling: sess.opts.unstable_opts.debug_info_for_profiling,
pgo_sample_use: if_regular!(sess.opts.cg.profile_sample_use.clone(), None),
debug_info_for_profiling: sess.opts.cg.debuginfo_for_profiling,
instrument_coverage: if_regular!(sess.instrument_coverage(), false),

sanitizer: if_regular!(sess.sanitizers(), SanitizerSet::empty()),
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_interface/src/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,7 @@ fn write_out_deps(tcx: TyCtxt<'_>, outputs: &OutputFilenames, out_filenames: &[P
checksum_hash_algo,
));
}
if let Some(ref profile_sample) = sess.opts.unstable_opts.profile_sample_use {
if let Some(ref profile_sample) = sess.opts.cg.profile_sample_use {
files.extend(hash_iter_files(
iter::once(normalize_path(profile_sample.as_path().to_path_buf())),
checksum_hash_algo,
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_interface/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,7 @@ fn test_codegen_options_tracking_hash() {
tracked!(control_flow_guard, CFGuard::Checks);
tracked!(debug_assertions, Some(true));
tracked!(debuginfo, DebugInfo::Limited);
tracked!(debuginfo_for_profiling, true);
tracked!(dwarf_version, Some(5));
tracked!(embed_bitcode, false);
tracked!(force_frame_pointers, FramePointer::Always);
Expand All @@ -634,6 +635,7 @@ fn test_codegen_options_tracking_hash() {
tracked!(passes, vec![String::from("1"), String::from("2")]);
tracked!(prefer_dynamic, true);
tracked!(profile_generate, SwitchWithOptPath::Enabled(None));
tracked!(profile_sample_use, Some(PathBuf::from("abc")));
tracked!(profile_use, Some(PathBuf::from("abc")));
tracked!(relocation_model, Some(RelocModel::Pic));
tracked!(relro_level, Some(RelroLevel::Full));
Expand Down Expand Up @@ -785,7 +787,6 @@ fn test_unstable_options_tracking_hash() {
);
tracked!(crate_attr, vec!["abc".to_string()]);
tracked!(cross_crate_inline_threshold, InliningThreshold::Always);
tracked!(debug_info_for_profiling, true);
tracked!(debug_info_type_line_numbers, true);
tracked!(default_visibility, Some(rustc_target::spec::SymbolVisibility::Hidden));
tracked!(dep_info_omit_d_target, true);
Expand Down Expand Up @@ -849,7 +850,6 @@ fn test_unstable_options_tracking_hash() {
tracked!(plt, Some(true));
tracked!(polonius, Polonius::Legacy);
tracked!(precise_enum_drop_elaboration, false);
tracked!(profile_sample_use, Some(PathBuf::from("abc")));
tracked!(profiler_runtime, "abc".to_string());
tracked!(reg_struct_return, true);
tracked!(regparm, Some(3));
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_session/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2513,11 +2513,11 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M
early_dcx.early_fatal("options `-C profile-generate` and `-C profile-use` are exclusive");
}

if unstable_opts.profile_sample_use.is_some()
if cg.profile_sample_use.is_some()
&& (cg.profile_generate.enabled() || cg.profile_use.is_some())
{
early_dcx.early_fatal(
"option `-Z profile-sample-use` cannot be used with `-C profile-generate` or `-C profile-use`",
"option `-C profile-sample-use` cannot be used with `-C profile-generate` or `-C profile-use`",
);
}

Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_session/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2056,6 +2056,8 @@ options! {
debuginfo: DebugInfo = (DebugInfo::None, parse_debuginfo, [TRACKED],
"debug info emission level (0-2, none, line-directives-only, \
line-tables-only, limited, or full; default: 0)"),
debuginfo_for_profiling: bool = (false, parse_bool, [TRACKED],
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We typically want the stabilization PR to be just the stabilization. You can do the rename (a trivial change) separately and then just rebase here.

Copy link
Copy Markdown
Contributor Author

@zamazan4ik zamazan4ik May 6, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've did it since it was done in the same way in another stabilization PR: #145974 : -Zno-jump-tables into -Cjump-tables=bool.

Just wanna to be sure that I understood you properly, what I need to do:

  • Create another PR into Rust Nightly, where I do only renaming -Zdebug-info-for-profiling into -Zdebuginfo-for-profiling. Do I need to update corresponding Unstable documentation for the flag as well?
  • Do I need to wait when is PR with renaming will be merged? If yes, in this case we create a lag between renaming the flag and actual stabilization, which potentially introduces a bit of additional hassle for Rust-for-Linux at very least, since they will need to change their scripts from from old unstable flag to a new unstable flag. And then do it once again from a new unstable flag to the stable one. However, maybe it's not a big deal anyway. If not - these two PRs should be merged somehow at the same time? Because I don't know how quickly such even "small" PRs with flag renaming are reviewed and merged in practice.

I am not against the proposed way - just wanna be extremely clear to not create a mess with separation and going back-and-forth with all of these things.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It creates some churn, but that is what you get when you use nightly.

Note that this PR needs FCP (so, a vote by the compiler team) that takes at least 10 days, the rename is cosmetic and so will easily be merged before that.

So yes you create another PR with a commit that does the rename (just search and replace across the code base). Perhaps you can add a run-make test as discussed below as a separate commit to the same PR.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, thanks!

"emit extra debug info to make sample profile more accurate"),
default_linker_libraries: bool = (false, parse_bool, [UNTRACKED],
"allow the linker to link its default libraries (default: no)"),
dlltool: Option<PathBuf> = (None, parse_opt_pathbuf, [UNTRACKED],
Expand Down Expand Up @@ -2140,6 +2142,8 @@ options! {
profile_generate: SwitchWithOptPath = (SwitchWithOptPath::Disabled,
parse_switch_with_opt_path, [TRACKED],
"compile the program with profiling instrumentation"),
profile_sample_use: Option<PathBuf> = (None, parse_opt_pathbuf, [TRACKED],
"use the given `.prof` file for sample-based profile-guided optimization"),
profile_use: Option<PathBuf> = (None, parse_opt_pathbuf, [TRACKED],
"use the given `.profdata` file for profile-guided optimization"),
#[rustc_lint_opt_deny_field_access("use `Session::relocation_model` instead of this field")]
Expand Down Expand Up @@ -2252,8 +2256,6 @@ options! {
"inject the given attribute in the crate"),
cross_crate_inline_threshold: InliningThreshold = (InliningThreshold::Sometimes(100), parse_inlining_threshold, [TRACKED],
"threshold to allow cross crate inlining of functions"),
debug_info_for_profiling: bool = (false, parse_bool, [TRACKED],
"emit discriminators and other data necessary for AutoFDO"),
debug_info_type_line_numbers: bool = (false, parse_bool, [TRACKED],
"emit type and line information for additional data types (default: no)"),
debuginfo_compression: DebugInfoCompression = (DebugInfoCompression::None, parse_debuginfo_compression, [TRACKED],
Expand Down Expand Up @@ -2560,8 +2562,6 @@ options! {
"how to run proc-macro code (default: same-thread)"),
profile_closures: bool = (false, parse_no_value, [UNTRACKED],
"profile size of closures"),
profile_sample_use: Option<PathBuf> = (None, parse_opt_pathbuf, [TRACKED],
"use the given `.prof` file for sampled profile-guided optimization (also known as AutoFDO)"),
profiler_runtime: String = (String::from("profiler_builtins"), parse_string, [TRACKED],
"name of the profiler runtime crate to automatically inject (default: `profiler_builtins`)"),
query_dep_graph: bool = (false, parse_bool, [UNTRACKED],
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_session/src/session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1150,7 +1150,7 @@ fn validate_commandline_args_with_session_available(sess: &Session) {
}

// Do the same for sample profile data.
if let Some(ref path) = sess.opts.unstable_opts.profile_sample_use {
if let Some(ref path) = sess.opts.cg.profile_sample_use {
if !path.exists() {
sess.dcx().emit_err(errors::ProfileSampleUseFileDoesNotExist { path });
}
Expand Down
11 changes: 11 additions & 0 deletions src/doc/rustc/src/codegen-options/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ following values:

Note: The [`-g` flag][option-g-debug] is an alias for `-C debuginfo=2`.

## debuginfo-for-profiling

Emit extra debug info (currently it's [DWARF descriminators](https://llvm.org/doxygen/AddDiscriminators_8cpp.html)) to make sample profile more accurate. See the chapter on [profile-guided optimization] for more information.

## default-linker-libraries

This flag controls whether or not the linker includes its default libraries.
Expand Down Expand Up @@ -539,6 +543,13 @@ an optional argument which is the path to a directory into which the
instrumented binary will emit the collected data. See the chapter on
[profile-guided optimization] for more information.

## profile-sample-use

This flag specifies the profiling data file to be used for sample-based
profile-guided optimization (SPGO). The flag takes a mandatory argument which is
the path to a valid `.prof` file. See the chapter on [profile-guided optimization]
for more information.

## profile-use

This flag specifies the profiling data file to be used for profile-guided
Expand Down
98 changes: 86 additions & 12 deletions src/doc/rustc/src/profile-guided-optimization.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,35 @@ This chapter describes what PGO is, what it is good for, and how it can be used.
The basic concept of PGO is to collect data about the typical execution of
a program (e.g. which branches it is likely to take) and then use this data
to inform optimizations such as inlining, machine-code layout,
register allocation, etc.
register allocation, etc. Optimization levels `-Copt-level=2` and above are
recommended for use of profile guided optimization.

`rustc` supports profile guided optimization with two different kinds of profiling. A sampling profiler can generate a profile with very low runtime overhead, or you can build an instrumented version of the code that collects more detailed profile information. Both kinds of profiles can provide execution counts for instructions in the code and information on branches taken and function invocation.

There are different ways of collecting data about a program's execution.
One is to run the program inside a profiler (such as `perf`) and another
is to create an instrumented binary, that is, a binary that has data
collection built into it, and run that.
The latter usually provides more accurate data and it is also what is
supported by `rustc`.

## Usage
## Differences Between Instrumentation and Sampling

Although both techniques are used for similar purposes, there are important differences between the two:

1. Profile data generated with one cannot be used by the other, and there is no conversion tool that can convert one to the other. So, a profile generated via `-Cprofile-generate` must be used with `-Cprofile-use`. Similarly, sampling profiles generated by external profilers must be converted and used with `-Cprofile-sample-use`.
2. Sampling profiles must be generated by an external tool. The profile generated by that tool must then be converted into a format that can be read by LLVM. The section on sampling profilers describes one of the supported sampling profile formats.

## Profiling with Instrumentation

Generating a PGO-optimized program involves following a workflow with four steps:

1. Compile the program with instrumentation enabled
(e.g. `rustc -Cprofile-generate=/tmp/pgo-data main.rs`)
(e.g. `rustc -Cprofile-generate=/tmp/pgo-data -O main.rs`)
2. Run the instrumented program (e.g. `./main`) which generates a
`default_<id>.profraw` file
3. Convert the `.profraw` file into a `.profdata` file using
LLVM's `llvm-profdata` tool
4. Compile the program again, this time making use of the profiling data
(for example `rustc -Cprofile-use=merged.profdata main.rs`)
(for example `rustc -Cprofile-use=merged.profdata -O main.rs`)

An instrumented program will create one or more `.profraw` files, one for each
instrumented binary. E.g. an instrumented executable that loads two instrumented
Expand Down Expand Up @@ -60,7 +68,7 @@ The `llvm-profdata` tool merges multiple `.profraw` files into a single

```bash
# STEP 1: Compile the binary with instrumentation
rustc -Cprofile-generate=/tmp/pgo-data -O ./main.rs
rustc -Cprofile-generate=/tmp/pgo-data -O main.rs

# STEP 2: Run the binary a few times, maybe with common sets of args.
# Each run will create or update `.profraw` files in /tmp/pgo-data
Expand All @@ -69,11 +77,11 @@ rustc -Cprofile-generate=/tmp/pgo-data -O ./main.rs
./main mydata3.csv

# STEP 3: Merge and post-process all the `.profraw` files in /tmp/pgo-data
llvm-profdata merge -o ./merged.profdata /tmp/pgo-data
llvm-profdata merge -o merged.profdata /tmp/pgo-data

# STEP 4: Use the merged `.profdata` file during optimization. All `rustc`
# flags have to be the same.
rustc -Cprofile-use=./merged.profdata -O ./main.rs
rustc -Cprofile-use=./merged.profdata -O main.rs
```

### A Complete Cargo Workflow
Expand Down Expand Up @@ -136,11 +144,77 @@ RUSTFLAGS="-Cprofile-use=/tmp/pgo-data/merged.profdata" \
Cargo prior to version 1.39 that will prevent PGO from working correctly. Be
sure to use Cargo 1.39 or newer when doing PGO.

## Profiling with Sampling

Sampling profilers are used to collect runtime information, such as hardware counters, while your application executes. They are typically very efficient and do not incur a large runtime overhead. The sample data collected by the profiler can be used during compilation to determine what the most executed areas of the code are.

Using the data from a sample profiler requires some changes in the way a program is built. Before the compiler can use profiling information, the code needs to execute under the profiler. The following is the usual build cycle when using sample profilers for optimization:

1. Build the code with source line table information. You can use all the usual build flags that you always build your application with. The only requirement is that DWARF debug info including source line information is generated. This DWARF information is important for the profiler to be able to map instructions back to source line locations. The usefulness of this DWARF information can be improved with the `-Cdebuginfo-for-profiling` option. For example:

```bash
rustc -Cdebuginfo=line-tables-only -Cdebuginfo-for-profiling -O main.rs
```

2. Run the executable under a sampling profiler. The specific profiler you use does not really matter, as long as its output can be converted into the format that the LLVM optimizer understands.

Two such profilers are the Linux Perf [profiler](https://perf.wiki.kernel.org/) and Intel’s Sampling Enabling Product (SEP), available as part of Intel VTune. While Perf is Linux-specific, SEP can be used on Linux, Windows, and FreeBSD.

The LLVM tool `llvm-profgen` can convert output of either Perf or SEP. An external project, [AutoFDO](https://github.com/google/autofdo), also provides a `create_llvm_prof` tool which supports Linux Perf output.

When using Perf:

```bash
perf record -b -e BR_INST_RETIRED.NEAR_TAKEN:uppp ./main
```

If the event above is unavailable, `branches:u` is probably next-best.

Note the use of the `-b` flag. This tells Perf to use the Last Branch Record (LBR) to record call chains. While this is not strictly required, it provides better call information, which improves the accuracy of the profile data.

When using SEP:

```bash
sep -start -out code.tb7 -ec BR_INST_RETIRED.NEAR_TAKEN:precise=yes:pdir -lbr no_filter:usr -perf-script brstack -app ./main
```

This produces a `code.perf.data.script` output which can be used with `llvm-profgen`’s `--perfscript` input option.

3. Convert the collected profile data to LLVM’s sample profile format. This is currently supported via the AutoFDO converter `create_llvm_prof`. Once built and installed, you can convert the `perf.data` file to LLVM using the command:

```bash
create_llvm_prof --binary=./main --out=main.prof
```

This will read `perf.data` and the binary file `./main` and emit the profile data in `main.prof`. Note that if you ran `perf` without the `-b` flag, you need to use `--use_lbr=false` when calling `create_llvm_prof`.

Alternatively, the LLVM tool `llvm-profgen` can also be used to generate the LLVM sample profile:

```bash
llvm-profgen --binary=./main --output=main.prof --perfdata=perf.data
```

Please note, `perf.data` must be collected with `-b` flag to Linux perf for the above step to work.

When using SEP the output is in the textual format corresponding to `llvm-profgen --perfscript`. For example:

```bash
llvm-profgen --binary=./main --output=main.prof --perfscript=main.perf.data.script
```

4. Build the code again using the collected profile. This step feeds the profile back to the optimizers. This should result in a binary that executes faster than the original one. Note that you are not required to build the code with the exact same arguments that you used in the first step. The only requirement is that you build the code with the same debug info options and `-Cprofile-sample-use`.

```bash
rustc -Cprofile-sample-use=main.prof -Cdebuginfo-for-profiling -O main.rs
```

Note that Sample-based PGO in `rustc` is mostly tested on `x86-64` Linux platforms. It should work on other hardware architectures and operating systems but it's not heavily tested yet.

## Further Reading

`rustc`'s PGO support relies entirely on LLVM's implementation of the feature
and is equivalent to what Clang offers via the `-fprofile-generate` /
`-fprofile-use` flags. The [Profile Guided Optimization][clang-pgo] section
`-fprofile-use` and `-fprofile-sample-use` flags. The [Profile Guided Optimization][clang-pgo] section
in Clang's documentation is therefore an interesting read for anyone who wants
to use PGO with Rust.

Expand All @@ -151,11 +225,11 @@ to use PGO with Rust.
As an alternative to directly using the compiler for Profile-Guided Optimization,
you may choose to go with `cargo-pgo`, which has an intuitive command-line API
and saves you the trouble of doing all the manual work. You can read more about
it in [cargo-pgo repository][cargo-pgo].
it in [cargo-pgo repository][cargo-pgo]. For now, `cargo-pgo` supports only Instrumentation PGO.

[cargo-pgo]: https://github.com/Kobzol/cargo-pgo

For the sake of completeness, here are the corresponding steps using `cargo-pgo`:
For the sake of completeness, here are the corresponding steps using `cargo-pgo` for Instrumentation PGO:

```bash
# Install if you haven't already
Expand Down

This file was deleted.

10 changes: 0 additions & 10 deletions src/doc/unstable-book/src/compiler-flags/profile_sample_use.md

This file was deleted.

Loading