Skip to content

Commit af90fab

Browse files
committed
NVPTX: Drop support for old hw and old ISAs
1 parent d00ba92 commit af90fab

12 files changed

Lines changed: 73 additions & 32 deletions

File tree

compiler/rustc_codegen_llvm/src/llvm_util.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,9 @@ fn print_target_cpus(sess: &Session, tm: &llvm::TargetMachine, out: &mut String)
523523
};
524524
let mut cpus = cpu_names
525525
.lines()
526+
.filter(|cpu_name| {
527+
!sess.target.unsupported_cpus.contains(&std::borrow::Cow::Borrowed(*cpu_name))
528+
})
526529
.map(|cpu_name| Cpu { cpu_name, remark: make_remark(cpu_name) })
527530
.collect::<VecDeque<_>>();
528531

compiler/rustc_codegen_ssa/src/base.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,13 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
688688
tcx.dcx().emit_fatal(errors::CpuRequired);
689689
}
690690

691+
if let Some(target_cpu) = &tcx.sess.opts.cg.target_cpu
692+
&& tcx.sess.target.unsupported_cpus.contains(&target_cpu.into())
693+
{
694+
// The target cpu is explicitly listed as an unsupported cpu
695+
tcx.dcx().emit_fatal(errors::CpuUnsupported { target_cpu: target_cpu.clone() });
696+
}
697+
691698
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx);
692699

693700
// Run the monomorphization collector and partition the collected items into

compiler/rustc_codegen_ssa/src/errors.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,12 @@ pub(crate) struct InsufficientVSCodeProduct;
550550
#[diag("target requires explicitly specifying a cpu with `-C target-cpu`")]
551551
pub(crate) struct CpuRequired;
552552

553+
#[derive(Diagnostic)]
554+
#[diag("target cpu `{$target_cpu}` is known but unsupported")]
555+
pub(crate) struct CpuUnsupported {
556+
pub target_cpu: String,
557+
}
558+
553559
#[derive(Diagnostic)]
554560
#[diag("processing debug info with `dsymutil` failed: {$status}")]
555561
#[note("{$output}")]

compiler/rustc_codegen_ssa/src/target_features.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,16 @@ pub fn target_spec_to_backend_features<'a>(
393393
},
394394
);
395395

396+
// This is needed to ensure that we don't use PTX ISA versions that we do not support
397+
if sess.target.arch == Arch::Nvptx64
398+
&& matches!(
399+
sess.opts.cg.target_cpu.as_deref(),
400+
None | Some("sm_70") | Some("sm_72") | Some("sm_75")
401+
)
402+
{
403+
rust_features.push((true, "ptx70"));
404+
}
405+
396406
// Add this to the backend features.
397407
for (enable, feature) in rust_features {
398408
extend_backend_features(feature, enable);

compiler/rustc_target/src/spec/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2277,6 +2277,10 @@ pub struct TargetOptions {
22772277
/// Whether a cpu needs to be explicitly set.
22782278
/// Set to true if there is no default cpu. Defaults to false.
22792279
pub need_explicit_cpu: bool,
2280+
/// A list of CPUs that are provided by llvm but is considered unsupported from Rust.
2281+
/// This is currently used to deprecate old gpus for the nvptx64 target which is
2282+
/// difficult or impossible to support necesarry Rust features.
2283+
pub unsupported_cpus: StaticCow<[StaticCow<str>]>,
22802284
/// Default (Rust) target features to enable for this target. These features
22812285
/// overwrite `-Ctarget-cpu` but can be overwritten with `-Ctarget-features`.
22822286
/// Corresponds to `llc -mattr=$llvm_features` where `$llvm_features` is the
@@ -2735,6 +2739,7 @@ impl Default for TargetOptions {
27352739
asm_args: cvs![],
27362740
cpu: "generic".into(),
27372741
need_explicit_cpu: false,
2742+
unsupported_cpus: cvs![],
27382743
features: "".into(),
27392744
direct_access_external_data: None,
27402745
dynamic_linking: false,

compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::spec::{
22
Arch, LinkSelfContainedDefault, LinkerFlavor, MergeFunctions, Os, PanicStrategy, Target,
3-
TargetMetadata, TargetOptions,
3+
TargetMetadata, TargetOptions, cvs,
44
};
55

66
pub(crate) fn target() -> Target {
@@ -22,7 +22,13 @@ pub(crate) fn target() -> Target {
2222
linker_flavor: LinkerFlavor::Llbc,
2323

2424
// With `ptx-linker` approach, it can be later overridden via link flags.
25-
cpu: "sm_30".into(),
25+
cpu: "sm_70".into(),
26+
27+
// Hardware dropped support for through MCPs
28+
unsupported_cpus: cvs!(
29+
"sm_20", "sm_21", "sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
30+
"sm_60", "sm_61", "sm_62"
31+
),
2632

2733
// FIXME: create tests for the atomics.
2834
max_atomic_width: Some(64),

compiler/rustc_target/src/target_features.rs

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -535,19 +535,7 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
535535

536536
const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
537537
// tidy-alphabetical-start
538-
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
539-
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
540-
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
541-
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
542-
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
543-
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
544-
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
545-
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
546-
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
547-
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
548-
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
549-
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
550-
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
538+
("sm_70", Unstable(sym::nvptx_target_feature), &[]),
551539
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
552540
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
553541
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
@@ -566,19 +554,7 @@ const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
566554
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
567555
// tidy-alphabetical-end
568556
// tidy-alphabetical-start
569-
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
570-
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
571-
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
572-
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
573-
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
574-
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
575-
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
576-
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
577-
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
578-
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
579-
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
580-
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
581-
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
557+
("ptx70", Unstable(sym::nvptx_target_feature), &[]),
582558
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
583559
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
584560
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),

src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,21 @@ There are two options for using the core library:
2525

2626
### Target and features
2727

28-
It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
29-
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
28+
It is often beneficial to specify the target SM architecture, such as `-C target-cpu=sm_89`, because the default prioritizes broad compatibility rather than performance. Doing so also selects the PTX version as the *maximum* of (a) the oldest PTX version that supports the chosen target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)) and (b) the oldest PTX version supported by the Rust toolchain, which maximizes driver compatibility.
29+
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target SM architecture (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
3030
Later PTX versions may allow more efficient code generation.
3131

3232
Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`).
3333
While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future.
3434

35+
## Minimum SM and PTX support by Rust version
36+
Old hardware architectures and PTX ISA versions are periodically dropped support for. This table shows the minimum supported versions per Rust version.
37+
38+
| Rust | SM minimum | PTX ISA minimum | Reference |
39+
| ------------ | -------------- | ------------- | --------- |
40+
| - 1.94 | 2.0 | 3.2 |
41+
| 1.95 - TBD | 7.0 | 7.0 (CUDA 11+) | [MCP](https://github.com/rust-lang/compiler-team/issues/965) |
42+
3543
## Building Rust kernels
3644

3745
A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.

tests/assembly-llvm/nvptx-arch-default.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@
88
extern crate breakpoint_panic_handler;
99

1010
// Verify default target arch with ptx-linker.
11-
// CHECK: .target sm_30
11+
// CHECK: .version 7.0
12+
// CHECK: .target sm_70
1213
// CHECK: .address_size 64

tests/assembly-llvm/nvptx-arch-emit-asm.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@
55
#![no_std]
66

77
// Verify default arch without ptx-linker involved.
8-
// CHECK: .target sm_30
8+
// CHECK: .version 7.0
9+
// CHECK: .target sm_70
910
// CHECK: .address_size 64

0 commit comments

Comments
 (0)