Skip to content

Commit 5d26634

Browse files
authored
Rollup merge of #152443 - kjetilkjeka:nvptx_drop_support_old_hw_and_isa, r=ZuseZ4
NVPTX: Drop support for old architectures and old ISAs This is the implementation of [this MCP](rust-lang/compiler-team#965 (comment)) I believe it was said that no FCP was needed, but if that is incorrect then the FCP is anyway scheduled to finish in 2 days so it can in any case be merged then.
2 parents 37d85e5 + a2d23ce commit 5d26634

15 files changed

Lines changed: 83 additions & 59 deletions

File tree

compiler/rustc_codegen_llvm/src/llvm_util.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -514,6 +514,9 @@ fn print_target_cpus(sess: &Session, tm: &llvm::TargetMachine, out: &mut String)
514514
};
515515
let mut cpus = cpu_names
516516
.lines()
517+
.filter(|cpu_name| {
518+
!sess.target.unsupported_cpus.contains(&std::borrow::Cow::Borrowed(*cpu_name))
519+
})
517520
.map(|cpu_name| Cpu { cpu_name, remark: make_remark(cpu_name) })
518521
.collect::<VecDeque<_>>();
519522

compiler/rustc_codegen_ssa/src/base.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -696,6 +696,13 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
696696
tcx.dcx().emit_fatal(errors::CpuRequired);
697697
}
698698

699+
if let Some(target_cpu) = &tcx.sess.opts.cg.target_cpu
700+
&& tcx.sess.target.unsupported_cpus.contains(&target_cpu.into())
701+
{
702+
// The target cpu is explicitly listed as an unsupported cpu
703+
tcx.dcx().emit_fatal(errors::CpuUnsupported { target_cpu: target_cpu.clone() });
704+
}
705+
699706
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx);
700707

701708
// Run the monomorphization collector and partition the collected items into

compiler/rustc_codegen_ssa/src/errors.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,12 @@ pub(crate) struct InsufficientVSCodeProduct;
540540
#[diag("target requires explicitly specifying a cpu with `-C target-cpu`")]
541541
pub(crate) struct CpuRequired;
542542

543+
#[derive(Diagnostic)]
544+
#[diag("target cpu `{$target_cpu}` is known but unsupported")]
545+
pub(crate) struct CpuUnsupported {
546+
pub target_cpu: String,
547+
}
548+
543549
#[derive(Diagnostic)]
544550
#[diag("processing debug info with `dsymutil` failed: {$status}")]
545551
#[note("{$output}")]

compiler/rustc_codegen_ssa/src/target_features.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,20 @@ pub fn target_spec_to_backend_features<'a>(
387387
sess: &'a Session,
388388
mut extend_backend_features: impl FnMut(&'a str, /* enable */ bool),
389389
) {
390-
// Compute implied features
391390
let mut rust_features = vec![];
391+
392+
// This check handles SM versions that defaults (by LLVM) to unsupported (by Rust) PTX ISA versions.
393+
// sm_70, sm_72 and sm_75 defaults to PTX ISA versions with major version 6, while sm_80 default to 7.0
394+
if sess.target.arch == Arch::Nvptx64
395+
&& matches!(
396+
sess.opts.cg.target_cpu.as_deref(),
397+
None | Some("sm_70") | Some("sm_72") | Some("sm_75")
398+
)
399+
{
400+
rust_features.push((true, "ptx70"));
401+
}
402+
403+
// Compute implied features
392404
parse_rust_feature_list(
393405
sess,
394406
&sess.target.features,

compiler/rustc_target/src/spec/json.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ impl Target {
116116
forward!(asm_args);
117117
forward!(cpu);
118118
forward!(need_explicit_cpu);
119+
forward!(unsupported_cpus);
119120
forward!(features);
120121
forward!(dynamic_linking);
121122
forward_opt!(direct_access_external_data);
@@ -320,6 +321,7 @@ impl ToJson for Target {
320321
target_option_val!(asm_args);
321322
target_option_val!(cpu);
322323
target_option_val!(need_explicit_cpu);
324+
target_option_val!(unsupported_cpus);
323325
target_option_val!(features);
324326
target_option_val!(dynamic_linking);
325327
target_option_val!(direct_access_external_data);
@@ -543,6 +545,7 @@ struct TargetSpecJson {
543545
asm_args: Option<StaticCow<[StaticCow<str>]>>,
544546
cpu: Option<StaticCow<str>>,
545547
need_explicit_cpu: Option<bool>,
548+
unsupported_cpus: Option<StaticCow<[StaticCow<str>]>>,
546549
features: Option<StaticCow<str>>,
547550
dynamic_linking: Option<bool>,
548551
direct_access_external_data: Option<bool>,

compiler/rustc_target/src/spec/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2361,6 +2361,10 @@ pub struct TargetOptions {
23612361
/// Whether a cpu needs to be explicitly set.
23622362
/// Set to true if there is no default cpu. Defaults to false.
23632363
pub need_explicit_cpu: bool,
2364+
/// A list of CPUs that are provided by LLVM but are considered unsupported by Rust.
2365+
/// These CPUs are omitted from `--print target-cpus` output and will cause an error
2366+
/// if used with `-Ctarget-cpu`.
2367+
pub unsupported_cpus: StaticCow<[StaticCow<str>]>,
23642368
/// Default (Rust) target features to enable for this target. These features
23652369
/// overwrite `-Ctarget-cpu` but can be overwritten with `-Ctarget-features`.
23662370
/// Corresponds to `llc -mattr=$llvm_features` where `$llvm_features` is the
@@ -2818,6 +2822,7 @@ impl Default for TargetOptions {
28182822
asm_args: cvs![],
28192823
cpu: "generic".into(),
28202824
need_explicit_cpu: false,
2825+
unsupported_cpus: cvs![],
28212826
features: "".into(),
28222827
direct_access_external_data: None,
28232828
dynamic_linking: false,

compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::spec::{
22
Arch, LinkSelfContainedDefault, LinkerFlavor, MergeFunctions, Os, PanicStrategy, Target,
3-
TargetMetadata, TargetOptions,
3+
TargetMetadata, TargetOptions, cvs,
44
};
55

66
pub(crate) fn target() -> Target {
@@ -22,7 +22,13 @@ pub(crate) fn target() -> Target {
2222
linker_flavor: LinkerFlavor::Llbc,
2323

2424
// With `ptx-linker` approach, it can be later overridden via link flags.
25-
cpu: "sm_30".into(),
25+
cpu: "sm_70".into(),
26+
27+
// No longer supported architectures
28+
unsupported_cpus: cvs!(
29+
"sm_20", "sm_21", "sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
30+
"sm_60", "sm_61", "sm_62"
31+
),
2632

2733
// FIXME: create tests for the atomics.
2834
max_atomic_width: Some(64),

compiler/rustc_target/src/target_features.rs

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -536,19 +536,7 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
536536

537537
const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
538538
// tidy-alphabetical-start
539-
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
540-
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
541-
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
542-
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
543-
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
544-
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
545-
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
546-
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
547-
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
548-
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
549-
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
550-
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
551-
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
539+
("sm_70", Unstable(sym::nvptx_target_feature), &[]),
552540
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
553541
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
554542
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
@@ -567,19 +555,7 @@ const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
567555
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
568556
// tidy-alphabetical-end
569557
// tidy-alphabetical-start
570-
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
571-
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
572-
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
573-
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
574-
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
575-
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
576-
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
577-
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
578-
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
579-
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
580-
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
581-
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
582-
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
558+
("ptx70", Unstable(sym::nvptx_target_feature), &[]),
583559
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
584560
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
585561
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),

src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,23 @@ There are two options for using the core library:
2525

2626
### Target and features
2727

28-
It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
29-
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
28+
It is often beneficial to specify the target SM architecture, such as `-C target-cpu=sm_89`, because the default prioritizes broad compatibility rather than performance. Doing so also selects the PTX version as the *maximum* of (a) the oldest PTX version that supports the chosen target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)) and (b) the oldest PTX version supported by the Rust toolchain, which maximizes driver compatibility.
29+
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target SM architecture (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
3030
Later PTX versions may allow more efficient code generation.
3131

3232
Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`).
3333
While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future.
3434

35+
## Minimum SM and PTX support by Rust version
36+
Support for old hardware architectures and PTX ISA versions is periodically dropped. This table shows the minimum supported versions per Rust version.
37+
38+
| Rust | SM minimum | PTX ISA minimum |
39+
| ------------ | -------------- | --------------- |
40+
| - 1.96 | 2.0 | 3.2 |
41+
| 1.97 - TBD | 7.0 (Volta+) | 7.0 (CUDA 11+) |
42+
43+
For a full overview of which GPUs support code built for a specific SM version, see the [CUDA GPU Compute Capability documentation](https://developer.nvidia.com/cuda/gpus).
44+
3545
## Building Rust kernels
3646

3747
A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.

tests/assembly-llvm/nvptx-arch-default.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@
88
extern crate breakpoint_panic_handler;
99

1010
// Verify default target arch with ptx-linker.
11-
// CHECK: .target sm_30
11+
// CHECK: .version 7.0
12+
// CHECK: .target sm_70
1213
// CHECK: .address_size 64

0 commit comments

Comments
 (0)