Skip to content

Commit 8ca00c4

Browse files
authored
Rollup merge of rust-lang#149637 - Flakebi:fix-convergent-mir-opts, r=nnethercote
Do not run jump-threading for GPUs GPU targets have convergent operations that must not be duplicated or moved in or out of control-flow. An example convergent operation is a barrier/syncthreads. The only MIR pass affected by this is jump-threading, it can duplicate calls. Disable jump-hreading for GPU targets to prevent generating incorrect code. This affects the amdgpu and nvptx targets. Fixes rust-lang#137086, see this issue for details. Tracking issue: rust-lang#135024 cc @RDambrosio016 @kjetilkjeka for nvptx cc @ZuseZ4
2 parents 0469a92 + 564a682 commit 8ca00c4

7 files changed

Lines changed: 144 additions & 3 deletions

File tree

compiler/rustc_mir_transform/src/jump_threading.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,14 @@ const MAX_COST: u8 = 100;
7676

7777
impl<'tcx> crate::MirPass<'tcx> for JumpThreading {
7878
fn is_enabled(&self, sess: &rustc_session::Session) -> bool {
79+
if sess.target.is_like_gpu {
80+
// Jump threading can duplicate calls in control-flow.
81+
// This leads to incorrect code when done for so called "convergent" operations on GPU
82+
// targets, similar to how inline assembly cannot be duplicated on all targets.
83+
// Conservatively prevent this by disabling the pass.
84+
// See also issue #137086.
85+
return false;
86+
}
7987
sess.mir_opt_level() >= 2
8088
}
8189

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
This intrinsic does not behave like a normal function call; it is a "[convergent]" operation and as such has non-standard control-flow effects which need special treatment by the language.
2+
Rust currently does not properly support convergent operations.
3+
This operation is hence provided on a best-effort basis.
4+
Using it may result in incorrect code under some circumstances.
5+
6+
[convergent]: https://llvm.org/docs/ConvergentOperations.html

library/stdarch/crates/core_arch/src/amdgpu/mod.rs

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,8 @@ pub fn wavefrontsize() -> u32 {
244244
/// Synchronize all wavefronts in a workgroup.
245245
///
246246
/// Each wavefronts in a workgroup waits at the barrier until all wavefronts in the workgroup arrive at a barrier.
247+
///
248+
#[doc = include_str!("intrinsic_is_convergent.md")]
247249
#[inline]
248250
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
249251
pub fn s_barrier() {
@@ -253,6 +255,8 @@ pub fn s_barrier() {
253255
/// Signal a specific barrier type.
254256
///
255257
/// Only for non-named barriers.
258+
///
259+
#[doc = include_str!("intrinsic_is_convergent.md")]
256260
#[inline]
257261
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
258262
pub unsafe fn s_barrier_signal<const BARRIER_TYPE: i32>() {
@@ -265,6 +269,8 @@ pub unsafe fn s_barrier_signal<const BARRIER_TYPE: i32>() {
265269
/// Provides access to the s_barrier_signal_first instruction;
266270
/// additionally ensures that the result value is valid even when
267271
/// the intrinsic is used from a wavefront that is not running in a workgroup.
272+
///
273+
#[doc = include_str!("intrinsic_is_convergent.md")]
268274
#[inline]
269275
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
270276
pub unsafe fn s_barrier_signal_isfirst<const BARRIER_TYPE: i32>() -> bool {
@@ -274,6 +280,8 @@ pub unsafe fn s_barrier_signal_isfirst<const BARRIER_TYPE: i32>() -> bool {
274280
/// Wait for a specific barrier type.
275281
///
276282
/// Only for non-named barriers.
283+
///
284+
#[doc = include_str!("intrinsic_is_convergent.md")]
277285
#[inline]
278286
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
279287
pub unsafe fn s_barrier_wait<const BARRIER_TYPE: i16>() {
@@ -283,6 +291,8 @@ pub unsafe fn s_barrier_wait<const BARRIER_TYPE: i16>() {
283291
/// Get the state of a specific barrier type.
284292
///
285293
/// The `barrier_type` argument must be uniform, otherwise behavior is undefined.
294+
///
295+
#[doc = include_str!("intrinsic_is_convergent.md")]
286296
#[inline]
287297
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
288298
pub unsafe fn s_get_barrier_state<const BARRIER_TYPE: i32>() -> u32 {
@@ -292,6 +302,8 @@ pub unsafe fn s_get_barrier_state<const BARRIER_TYPE: i32>() -> u32 {
292302
/// A barrier for only the threads within the current wavefront.
293303
///
294304
/// Does not result in an instruction but restricts the compiler.
305+
///
306+
#[doc = include_str!("intrinsic_is_convergent.md")]
295307
#[inline]
296308
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
297309
pub fn wave_barrier() {
@@ -315,6 +327,8 @@ pub fn wave_barrier() {
315327
/// - 0x0100: All DS read instructions may be scheduled across `sched_barrier`.
316328
/// - 0x0200: All DS write instructions may be scheduled across `sched_barrier`.
317329
/// - 0x0400: All Transcendental (e.g. V_EXP) instructions may be scheduled across `sched_barrier`.
330+
///
331+
#[doc = include_str!("intrinsic_is_convergent.md")]
318332
#[inline]
319333
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
320334
pub unsafe fn sched_barrier<const MASK: u32>() {
@@ -345,6 +359,8 @@ pub unsafe fn sched_barrier<const MASK: u32>() {
345359
/// // 5 MFMA
346360
/// sched_group_barrier::<8, 5, 0>()
347361
/// ```
362+
///
363+
#[doc = include_str!("intrinsic_is_convergent.md")]
348364
#[inline]
349365
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
350366
pub unsafe fn sched_group_barrier<const MASK: u32, const SIZE: u32, const SYNC_ID: u32>() {
@@ -366,6 +382,8 @@ pub fn s_sleep<const COUNT: u32>() {
366382
/// Stop execution of the kernel.
367383
///
368384
/// This usually signals an error state.
385+
///
386+
#[doc = include_str!("intrinsic_is_convergent.md")]
369387
#[inline]
370388
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
371389
pub fn s_sethalt<const VALUE: u32>() -> ! {
@@ -407,6 +425,8 @@ pub fn mbcnt_hi(value: u32, init: u32) -> u32 {
407425

408426
/// Returns a bitfield (`u32` or `u64`) containing the result of its i1 argument
409427
/// in all active lanes, and zero in all inactive lanes.
428+
///
429+
#[doc = include_str!("intrinsic_is_convergent.md")]
410430
#[inline]
411431
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
412432
pub fn ballot(b: bool) -> u64 {
@@ -419,6 +439,8 @@ pub fn ballot(b: bool) -> u64 {
419439
/// While [`ballot`] converts a `bool` to a mask, `inverse_ballot` converts a mask back to a `bool`.
420440
/// This means `inverse_ballot(ballot(b)) == b`.
421441
/// The inverse of `ballot(inverse_ballot(value)) ~= value` is not always true as inactive lanes are set to zero by `ballot`.
442+
///
443+
#[doc = include_str!("intrinsic_is_convergent.md")]
422444
#[inline]
423445
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
424446
pub fn inverse_ballot(value: u64) -> bool {
@@ -433,6 +455,8 @@ pub fn inverse_ballot(value: u64) -> bool {
433455
/// - 2: DPP
434456
///
435457
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
458+
///
459+
#[doc = include_str!("intrinsic_is_convergent.md")]
436460
#[inline]
437461
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
438462
pub fn wave_reduce_umin<const STRATEGY: u32>(value: u32) -> u32 {
@@ -447,6 +471,8 @@ pub fn wave_reduce_umin<const STRATEGY: u32>(value: u32) -> u32 {
447471
/// - 2: DPP
448472
///
449473
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
474+
///
475+
#[doc = include_str!("intrinsic_is_convergent.md")]
450476
#[inline]
451477
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
452478
pub fn wave_reduce_min<const STRATEGY: u32>(value: i32) -> i32 {
@@ -462,6 +488,8 @@ pub fn wave_reduce_min<const STRATEGY: u32>(value: i32) -> i32 {
462488
/// - 2: DPP
463489
///
464490
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
491+
///
492+
#[doc = include_str!("intrinsic_is_convergent.md")]
465493
#[inline]
466494
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
467495
pub fn wave_reduce_umax<const STRATEGY: u32>(value: u32) -> u32 {
@@ -476,6 +504,8 @@ pub fn wave_reduce_umax<const STRATEGY: u32>(value: u32) -> u32 {
476504
/// - 2: DPP
477505
///
478506
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
507+
///
508+
#[doc = include_str!("intrinsic_is_convergent.md")]
479509
#[inline]
480510
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
481511
pub fn wave_reduce_max<const STRATEGY: u32>(value: i32) -> i32 {
@@ -491,6 +521,8 @@ pub fn wave_reduce_max<const STRATEGY: u32>(value: i32) -> i32 {
491521
/// - 2: DPP
492522
///
493523
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
524+
///
525+
#[doc = include_str!("intrinsic_is_convergent.md")]
494526
#[inline]
495527
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
496528
pub fn wave_reduce_add<const STRATEGY: u32>(value: u32) -> u32 {
@@ -506,6 +538,8 @@ pub fn wave_reduce_add<const STRATEGY: u32>(value: u32) -> u32 {
506538
/// - 2: DPP
507539
///
508540
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
541+
///
542+
#[doc = include_str!("intrinsic_is_convergent.md")]
509543
#[inline]
510544
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
511545
pub fn wave_reduce_and<const STRATEGY: u32>(value: u32) -> u32 {
@@ -520,6 +554,8 @@ pub fn wave_reduce_and<const STRATEGY: u32>(value: u32) -> u32 {
520554
/// - 2: DPP
521555
///
522556
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
557+
///
558+
#[doc = include_str!("intrinsic_is_convergent.md")]
523559
#[inline]
524560
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
525561
pub fn wave_reduce_or<const STRATEGY: u32>(value: u32) -> u32 {
@@ -534,6 +570,8 @@ pub fn wave_reduce_or<const STRATEGY: u32>(value: u32) -> u32 {
534570
/// - 2: DPP
535571
///
536572
/// If target does not support the DPP operations (e.g. gfx6/7), reduction will be performed using default iterative strategy.
573+
///
574+
#[doc = include_str!("intrinsic_is_convergent.md")]
537575
#[inline]
538576
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
539577
pub fn wave_reduce_xor<const STRATEGY: u32>(value: u32) -> u32 {
@@ -544,12 +582,16 @@ pub fn wave_reduce_xor<const STRATEGY: u32>(value: u32) -> u32 {
544582
// The following intrinsics can have multiple sizes
545583

546584
/// Get `value` from the first active lane in the wavefront.
585+
///
586+
#[doc = include_str!("intrinsic_is_convergent.md")]
547587
#[inline]
548588
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
549589
pub fn readfirstlane_u32(value: u32) -> u32 {
550590
llvm_readfirstlane_u32(value)
551591
}
552592
/// Get `value` from the first active lane in the wavefront.
593+
///
594+
#[doc = include_str!("intrinsic_is_convergent.md")]
553595
#[inline]
554596
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
555597
pub fn readfirstlane_u64(value: u64) -> u64 {
@@ -559,6 +601,8 @@ pub fn readfirstlane_u64(value: u64) -> u64 {
559601
///
560602
/// The lane argument must be uniform across the currently active threads
561603
/// of the current wavefront. Otherwise, the result is undefined.
604+
///
605+
#[doc = include_str!("intrinsic_is_convergent.md")]
562606
#[inline]
563607
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
564608
pub unsafe fn readlane_u32(value: u32, lane: u32) -> u32 {
@@ -568,6 +612,8 @@ pub unsafe fn readlane_u32(value: u32, lane: u32) -> u32 {
568612
///
569613
/// The lane argument must be uniform across the currently active threads
570614
/// of the current wavefront. Otherwise, the result is undefined.
615+
///
616+
#[doc = include_str!("intrinsic_is_convergent.md")]
571617
#[inline]
572618
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
573619
pub unsafe fn readlane_u64(value: u64, lane: u32) -> u64 {
@@ -582,6 +628,8 @@ pub unsafe fn readlane_u64(value: u64, lane: u32) -> u64 {
582628
///
583629
/// `value` is the value returned by `lane`.
584630
/// `default` is the value returned by all lanes other than `lane`.
631+
///
632+
#[doc = include_str!("intrinsic_is_convergent.md")]
585633
#[inline]
586634
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
587635
pub unsafe fn writelane_u32(value: u32, lane: u32, default: u32) -> u32 {
@@ -596,6 +644,8 @@ pub unsafe fn writelane_u32(value: u32, lane: u32, default: u32) -> u32 {
596644
///
597645
/// `value` is the value returned by `lane`.
598646
/// `default` is the value returned by all lanes other than `lane`.
647+
///
648+
#[doc = include_str!("intrinsic_is_convergent.md")]
599649
#[inline]
600650
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
601651
pub unsafe fn writelane_u64(value: u64, lane: u32, default: u64) -> u64 {
@@ -605,6 +655,8 @@ pub unsafe fn writelane_u64(value: u64, lane: u32, default: u64) -> u64 {
605655
/// Stop execution of the wavefront.
606656
///
607657
/// This usually signals the end of a successful execution.
658+
///
659+
#[doc = include_str!("intrinsic_is_convergent.md")]
608660
#[inline]
609661
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
610662
pub fn endpgm() -> ! {
@@ -621,6 +673,8 @@ pub fn endpgm() -> ! {
621673
/// v_mov_b32 <dest> <old>
622674
/// v_mov_b32 <dest> <src> <dpp_ctrl> <row_mask> <bank_mask> <bound_ctrl>
623675
/// ```
676+
///
677+
#[doc = include_str!("intrinsic_is_convergent.md")]
624678
#[inline]
625679
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
626680
pub unsafe fn update_dpp<
@@ -651,6 +705,8 @@ pub fn s_memrealtime() -> u64 {
651705
///
652706
/// Reading from inactive lanes returns `0`.
653707
/// In case multiple values get written to the same `lane`, the value from the source lane with the higher index is taken.
708+
///
709+
#[doc = include_str!("intrinsic_is_convergent.md")]
654710
#[inline]
655711
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
656712
pub unsafe fn ds_permute(lane: u32, value: u32) -> u32 {
@@ -661,6 +717,8 @@ pub unsafe fn ds_permute(lane: u32, value: u32) -> u32 {
661717
/// Returns the `value` given to `ds_permute` by lane `lane`.
662718
///
663719
/// Reading from inactive lanes returns `0`.
720+
///
721+
#[doc = include_str!("intrinsic_is_convergent.md")]
664722
#[inline]
665723
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
666724
pub unsafe fn ds_bpermute(lane: u32, value: u32) -> u32 {
@@ -680,6 +738,8 @@ pub unsafe fn perm(src0: u32, src1: u32, selector: u32) -> u32 {
680738
///
681739
/// The third and fourth inputs must be uniform across the current wavefront.
682740
/// These are combined into a single 64-bit value representing lane selects used to swizzle within each row.
741+
///
742+
#[doc = include_str!("intrinsic_is_convergent.md")]
683743
#[inline]
684744
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
685745
pub unsafe fn permlane16_u32<const FI: bool, const BOUND_CONTROL: bool>(
@@ -696,6 +756,8 @@ pub unsafe fn permlane16_u32<const FI: bool, const BOUND_CONTROL: bool>(
696756
///
697757
/// The third and fourth inputs must be uniform across the current wavefront.
698758
/// These are combined into a single 64-bit value representing lane selects used to swizzle within each row.
759+
///
760+
#[doc = include_str!("intrinsic_is_convergent.md")]
699761
#[inline]
700762
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
701763
pub unsafe fn permlanex16_u32<const FI: bool, const BOUND_CONTROL: bool>(
@@ -718,6 +780,8 @@ pub fn s_get_waveid_in_workgroup() -> u32 {
718780
/// Swap `value` between upper and lower 32 lanes in a wavefront.
719781
///
720782
/// Does nothing for wave32.
783+
///
784+
#[doc = include_str!("intrinsic_is_convergent.md")]
721785
#[inline]
722786
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
723787
pub unsafe fn permlane64_u32(value: u32) -> u32 {
@@ -728,6 +792,8 @@ pub unsafe fn permlane64_u32(value: u32) -> u32 {
728792
/// Performs arbitrary gather-style operation within a row (16 contiguous lanes) of the second input operand.
729793
///
730794
/// In contrast to [`permlane16_u32`], allows each lane to specify its own gather lane.
795+
///
796+
#[doc = include_str!("intrinsic_is_convergent.md")]
731797
#[inline]
732798
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
733799
pub unsafe fn permlane16_var<const FI: bool, const BOUND_CONTROL: bool>(
@@ -742,6 +808,8 @@ pub unsafe fn permlane16_var<const FI: bool, const BOUND_CONTROL: bool>(
742808
/// Performs arbitrary gather-style operation across two rows (16 contiguous lanes) of the second input operand.
743809
///
744810
/// In contrast to [`permlanex16_u32`], allows each lane to specify its own gather lane.
811+
///
812+
#[doc = include_str!("intrinsic_is_convergent.md")]
745813
#[inline]
746814
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
747815
pub unsafe fn permlanex16_var<const FI: bool, const BOUND_CONTROL: bool>(
@@ -766,6 +834,8 @@ pub fn wave_id() -> u32 {
766834
/// Odd rows of the first operand are swapped with even rows of the second operand (one row is 16 lanes).
767835
/// Returns a pair for the swapped registers.
768836
/// The first element of the return corresponds to the swapped element of the first argument.
837+
///
838+
#[doc = include_str!("intrinsic_is_convergent.md")]
769839
#[inline]
770840
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
771841
pub unsafe fn permlane16_swap<const FI: bool, const BOUND_CONTROL: bool>(
@@ -782,6 +852,8 @@ pub unsafe fn permlane16_swap<const FI: bool, const BOUND_CONTROL: bool>(
782852
/// Rows 2 and 3 of the first operand are swapped with rows 0 and 1 of the second operand (one row is 16 lanes).
783853
/// Returns a pair for the swapped registers.
784854
/// The first element of the return corresponds to the swapped element of the first argument.
855+
///
856+
#[doc = include_str!("intrinsic_is_convergent.md")]
785857
#[inline]
786858
#[unstable(feature = "stdarch_amdgpu", issue = "149988")]
787859
pub unsafe fn permlane32_swap<const FI: bool, const BOUND_CONTROL: bool>(

library/stdarch/crates/core_arch/src/nvptx/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ unsafe extern "C" {
4949
}
5050

5151
/// Synchronizes all threads in the block.
52+
///
53+
#[doc = include_str!("../amdgpu/intrinsic_is_convergent.md")]
5254
#[inline]
5355
#[unstable(feature = "stdarch_nvptx", issue = "111199")]
5456
pub unsafe fn _syncthreads() -> () {

src/doc/rustc-dev-guide/src/tests/minicore.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
<!-- date-check: Oct 2025 -->
44

5-
[`tests/auxiliary/minicore.rs`][`minicore`] is a test auxiliary for ui/codegen/assembly test suites.
5+
[`tests/auxiliary/minicore.rs`][`minicore`] is a test auxiliary for ui/codegen/assembly/mir-opt test suites.
66
It provides `core` stubs for tests that need to
77
build for cross-compiled targets but do not need/want to run.
88

src/tools/compiletest/src/directives.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,9 +478,12 @@ impl TestProps {
478478
fn update_add_minicore(&mut self, ln: &DirectiveLine<'_>, config: &Config) {
479479
let add_minicore = config.parse_name_directive(ln, directives::ADD_MINICORE);
480480
if add_minicore {
481-
if !matches!(config.mode, TestMode::Ui | TestMode::Codegen | TestMode::Assembly) {
481+
if !matches!(
482+
config.mode,
483+
TestMode::Ui | TestMode::Codegen | TestMode::Assembly | TestMode::MirOpt
484+
) {
482485
panic!(
483-
"`add-minicore` is currently only supported for ui, codegen and assembly test modes"
486+
"`add-minicore` is currently only supported for ui, codegen, assembly and mir-opt test modes"
484487
);
485488
}
486489

0 commit comments

Comments
 (0)