Skip to content

Commit e04ea32

Browse files
committed
NVPTX: Drop support for old hw and old ISAs
1 parent b90dc1e commit e04ea32

14 files changed

Lines changed: 78 additions & 56 deletions

File tree

compiler/rustc_codegen_llvm/src/llvm_util.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,9 @@ fn print_target_cpus(sess: &Session, tm: &llvm::TargetMachine, out: &mut String)
523523
};
524524
let mut cpus = cpu_names
525525
.lines()
526+
.filter(|cpu_name| {
527+
!sess.target.unsupported_cpus.contains(&std::borrow::Cow::Borrowed(*cpu_name))
528+
})
526529
.map(|cpu_name| Cpu { cpu_name, remark: make_remark(cpu_name) })
527530
.collect::<VecDeque<_>>();
528531

compiler/rustc_codegen_ssa/src/base.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -688,6 +688,13 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
688688
tcx.dcx().emit_fatal(errors::CpuRequired);
689689
}
690690

691+
if let Some(target_cpu) = &tcx.sess.opts.cg.target_cpu
692+
&& tcx.sess.target.unsupported_cpus.contains(&target_cpu.into())
693+
{
694+
// The target cpu is explicitly listed as an unsupported cpu
695+
tcx.dcx().emit_fatal(errors::CpuUnsupported { target_cpu: target_cpu.clone() });
696+
}
697+
691698
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx);
692699

693700
// Run the monomorphization collector and partition the collected items into

compiler/rustc_codegen_ssa/src/errors.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,12 @@ pub(crate) struct InsufficientVSCodeProduct;
545545
#[diag("target requires explicitly specifying a cpu with `-C target-cpu`")]
546546
pub(crate) struct CpuRequired;
547547

548+
#[derive(Diagnostic)]
549+
#[diag("target cpu `{$target_cpu}` is known but unsupported")]
550+
pub(crate) struct CpuUnsupported {
551+
pub target_cpu: String,
552+
}
553+
548554
#[derive(Diagnostic)]
549555
#[diag("processing debug info with `dsymutil` failed: {$status}")]
550556
#[note("{$output}")]

compiler/rustc_codegen_ssa/src/target_features.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,16 @@ pub fn target_spec_to_backend_features<'a>(
393393
},
394394
);
395395

396+
// This is needed to ensure that we don't use PTX ISA versions that we do not support
397+
if sess.target.arch == Arch::Nvptx64
398+
&& matches!(
399+
sess.opts.cg.target_cpu.as_deref(),
400+
None | Some("sm_70") | Some("sm_72") | Some("sm_75")
401+
)
402+
{
403+
rust_features.push((true, "ptx70"));
404+
}
405+
396406
// Add this to the backend features.
397407
for (enable, feature) in rust_features {
398408
extend_backend_features(feature, enable);

compiler/rustc_target/src/spec/json.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ impl Target {
118118
forward!(asm_args);
119119
forward!(cpu);
120120
forward!(need_explicit_cpu);
121+
forward!(unsupported_cpus);
121122
forward!(features);
122123
forward!(dynamic_linking);
123124
forward_opt!(direct_access_external_data);
@@ -322,6 +323,7 @@ impl ToJson for Target {
322323
target_option_val!(asm_args);
323324
target_option_val!(cpu);
324325
target_option_val!(need_explicit_cpu);
326+
target_option_val!(unsupported_cpus);
325327
target_option_val!(features);
326328
target_option_val!(dynamic_linking);
327329
target_option_val!(direct_access_external_data);
@@ -545,6 +547,7 @@ struct TargetSpecJson {
545547
asm_args: Option<StaticCow<[StaticCow<str>]>>,
546548
cpu: Option<StaticCow<str>>,
547549
need_explicit_cpu: Option<bool>,
550+
unsupported_cpus: Option<StaticCow<[StaticCow<str>]>>,
548551
features: Option<StaticCow<str>>,
549552
dynamic_linking: Option<bool>,
550553
direct_access_external_data: Option<bool>,

compiler/rustc_target/src/spec/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2279,6 +2279,10 @@ pub struct TargetOptions {
22792279
/// Whether a cpu needs to be explicitly set.
22802280
/// Set to true if there is no default cpu. Defaults to false.
22812281
pub need_explicit_cpu: bool,
2282+
/// A list of CPUs that are provided by LLVM but are considered unsupported by Rust.
2283+
/// These CPUs are omitted from `--print target-cpus` output and will cause an error
2284+
/// if used with `-Ctarget-cpu`.
2285+
pub unsupported_cpus: StaticCow<[StaticCow<str>]>,
22822286
/// Default (Rust) target features to enable for this target. These features
22832287
/// overwrite `-Ctarget-cpu` but can be overwritten with `-Ctarget-features`.
22842288
/// Corresponds to `llc -mattr=$llvm_features` where `$llvm_features` is the
@@ -2737,6 +2741,7 @@ impl Default for TargetOptions {
27372741
asm_args: cvs![],
27382742
cpu: "generic".into(),
27392743
need_explicit_cpu: false,
2744+
unsupported_cpus: cvs![],
27402745
features: "".into(),
27412746
direct_access_external_data: None,
27422747
dynamic_linking: false,

compiler/rustc_target/src/spec/targets/nvptx64_nvidia_cuda.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use crate::spec::{
22
Arch, LinkSelfContainedDefault, LinkerFlavor, MergeFunctions, Os, PanicStrategy, Target,
3-
TargetMetadata, TargetOptions,
3+
TargetMetadata, TargetOptions, cvs,
44
};
55

66
pub(crate) fn target() -> Target {
@@ -22,7 +22,13 @@ pub(crate) fn target() -> Target {
2222
linker_flavor: LinkerFlavor::Llbc,
2323

2424
// With `ptx-linker` approach, it can be later overridden via link flags.
25-
cpu: "sm_30".into(),
25+
cpu: "sm_70".into(),
26+
27+
// No longer supported architectures
28+
unsupported_cpus: cvs!(
29+
"sm_20", "sm_21", "sm_30", "sm_32", "sm_35", "sm_37", "sm_50", "sm_52", "sm_53",
30+
"sm_60", "sm_61", "sm_62"
31+
),
2632

2733
// FIXME: create tests for the atomics.
2834
max_atomic_width: Some(64),

compiler/rustc_target/src/target_features.rs

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -519,19 +519,7 @@ const MIPS_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
519519

520520
const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
521521
// tidy-alphabetical-start
522-
("sm_20", Unstable(sym::nvptx_target_feature), &[]),
523-
("sm_21", Unstable(sym::nvptx_target_feature), &["sm_20"]),
524-
("sm_30", Unstable(sym::nvptx_target_feature), &["sm_21"]),
525-
("sm_32", Unstable(sym::nvptx_target_feature), &["sm_30"]),
526-
("sm_35", Unstable(sym::nvptx_target_feature), &["sm_32"]),
527-
("sm_37", Unstable(sym::nvptx_target_feature), &["sm_35"]),
528-
("sm_50", Unstable(sym::nvptx_target_feature), &["sm_37"]),
529-
("sm_52", Unstable(sym::nvptx_target_feature), &["sm_50"]),
530-
("sm_53", Unstable(sym::nvptx_target_feature), &["sm_52"]),
531-
("sm_60", Unstable(sym::nvptx_target_feature), &["sm_53"]),
532-
("sm_61", Unstable(sym::nvptx_target_feature), &["sm_60"]),
533-
("sm_62", Unstable(sym::nvptx_target_feature), &["sm_61"]),
534-
("sm_70", Unstable(sym::nvptx_target_feature), &["sm_62"]),
522+
("sm_70", Unstable(sym::nvptx_target_feature), &[]),
535523
("sm_72", Unstable(sym::nvptx_target_feature), &["sm_70"]),
536524
("sm_75", Unstable(sym::nvptx_target_feature), &["sm_72"]),
537525
("sm_80", Unstable(sym::nvptx_target_feature), &["sm_75"]),
@@ -550,19 +538,7 @@ const NVPTX_FEATURES: &[(&str, Stability, ImpliedFeatures)] = &[
550538
("sm_120a", Unstable(sym::nvptx_target_feature), &["sm_120"]),
551539
// tidy-alphabetical-end
552540
// tidy-alphabetical-start
553-
("ptx32", Unstable(sym::nvptx_target_feature), &[]),
554-
("ptx40", Unstable(sym::nvptx_target_feature), &["ptx32"]),
555-
("ptx41", Unstable(sym::nvptx_target_feature), &["ptx40"]),
556-
("ptx42", Unstable(sym::nvptx_target_feature), &["ptx41"]),
557-
("ptx43", Unstable(sym::nvptx_target_feature), &["ptx42"]),
558-
("ptx50", Unstable(sym::nvptx_target_feature), &["ptx43"]),
559-
("ptx60", Unstable(sym::nvptx_target_feature), &["ptx50"]),
560-
("ptx61", Unstable(sym::nvptx_target_feature), &["ptx60"]),
561-
("ptx62", Unstable(sym::nvptx_target_feature), &["ptx61"]),
562-
("ptx63", Unstable(sym::nvptx_target_feature), &["ptx62"]),
563-
("ptx64", Unstable(sym::nvptx_target_feature), &["ptx63"]),
564-
("ptx65", Unstable(sym::nvptx_target_feature), &["ptx64"]),
565-
("ptx70", Unstable(sym::nvptx_target_feature), &["ptx65"]),
541+
("ptx70", Unstable(sym::nvptx_target_feature), &[]),
566542
("ptx71", Unstable(sym::nvptx_target_feature), &["ptx70"]),
567543
("ptx72", Unstable(sym::nvptx_target_feature), &["ptx71"]),
568544
("ptx73", Unstable(sym::nvptx_target_feature), &["ptx72"]),

src/doc/rustc/src/platform-support/nvptx64-nvidia-cuda.md

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,23 @@ There are two options for using the core library:
2525

2626
### Target and features
2727

28-
It is generally necessary to specify the target, such as `-C target-cpu=sm_89`, because the default is very old. This implies two target features: `sm_89` and `ptx78` (and all preceding features within `sm_*` and `ptx*`). Rust will default to using the oldest PTX version that supports the target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)), which maximizes driver compatibility.
29-
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
28+
It is often beneficial to specify the target SM architecture, such as `-C target-cpu=sm_89`, because the default prioritizes broad compatibility rather than performance. Doing so also selects the PTX version as the *maximum* of (a) the oldest PTX version that supports the chosen target processor (see [this table](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#release-notes-ptx-release-history)) and (b) the oldest PTX version supported by the Rust toolchain, which maximizes driver compatibility.
29+
One can use `-C target-feature=+ptx80` to choose a later PTX version without changing the target SM architecture (the default in this case, `ptx78`, requires CUDA driver version 11.8, while `ptx80` would require driver version 12.0).
3030
Later PTX versions may allow more efficient code generation.
3131

3232
Although Rust follows LLVM in representing `ptx*` and `sm_*` as target features, they should be thought of as having crate granularity, set via (either via `-Ctarget-cpu` and optionally `-Ctarget-feature`).
3333
While the compiler accepts `#[target_feature(enable = "ptx80", enable = "sm_89")]`, it is not supported, may not behave as intended, and may become erroneous in the future.
3434

35+
## Minimum SM and PTX support by Rust version
36+
Old hardware architectures and PTX ISA versions are periodically dropped support for. This table shows the minimum supported versions per Rust version.
37+
38+
| Rust | SM minimum | PTX ISA minimum |
39+
| ------------ | -------------- | --------------- |
40+
| - 1.95 | 2.0 | 3.2 |
41+
| 1.96 - TBD | 7.0 (Volta+) | 7.0 (CUDA 11+) |
42+
43+
For a full overview of which GPUs can run code built for a specific SM version, see the [CUDA GPU Compute Capability documentation](https://developer.nvidia.com/cuda/gpus).
44+
3545
## Building Rust kernels
3646

3747
A `no_std` crate containing one or more functions with `extern "ptx-kernel"` can be compiled to PTX using a command like the following.

tests/assembly-llvm/nvptx-arch-default.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,6 @@
88
extern crate breakpoint_panic_handler;
99

1010
// Verify default target arch with ptx-linker.
11-
// CHECK: .target sm_30
11+
// CHECK: .version 7.0
12+
// CHECK: .target sm_70
1213
// CHECK: .address_size 64

0 commit comments

Comments
 (0)