Skip to content

Commit 3f68c63

Browse files
committed
Auto merge of rust-lang#122582 - scottmcm:swap-intrinsic-v2, r=<try>
Let codegen decide when to `mem::swap` with immediates Making `libcore` decide this is silly; the backend has so much better information about when it's a good idea. Thus this PR introduces a new `typed_swap` intrinsic with a fallback body, and replaces that fallback implementation when swapping immediates or scalar pairs. r? oli-obk Replaces rust-lang#111744, and means we'll never need more libs PRs like rust-lang#111803 or rust-lang#107140
2 parents 22e241e + e583c57 commit 3f68c63

File tree

15 files changed

+298
-29
lines changed

15 files changed

+298
-29
lines changed

compiler/rustc_codegen_ssa/src/mir/intrinsic.rs

+16
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,22 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
7676
let name = bx.tcx().item_name(def_id);
7777
let name_str = name.as_str();
7878

79+
// If we're swapping something that's *not* an `OperandValue::Ref`,
80+
// then we can do it directly and avoid the alloca.
81+
// Otherwise, we'll let the fallback MIR body take care of it.
82+
if let sym::typed_swap = name {
83+
let pointee_ty = fn_args.type_at(0);
84+
let pointee_layout = bx.layout_of(pointee_ty);
85+
if !bx.is_backend_ref(pointee_layout) {
86+
let x_place = PlaceRef::new_sized(args[0].immediate(), pointee_layout);
87+
let y_place = PlaceRef::new_sized(args[1].immediate(), pointee_layout);
88+
let temp = bx.load_operand(x_place);
89+
bx.typed_place_copy(x_place, y_place);
90+
temp.val.store(bx, y_place);
91+
return Ok(());
92+
}
93+
}
94+
7995
let llret_ty = bx.backend_type(bx.layout_of(ret_ty));
8096
let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);
8197

compiler/rustc_codegen_ssa/src/traits/builder.rs

+21
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use super::abi::AbiBuilderMethods;
22
use super::asm::AsmBuilderMethods;
3+
use super::consts::ConstMethods;
34
use super::coverageinfo::CoverageInfoBuilderMethods;
45
use super::debuginfo::DebugInfoBuilderMethods;
56
use super::intrinsic::IntrinsicCallMethods;
@@ -267,6 +268,26 @@ pub trait BuilderMethods<'a, 'tcx>:
267268
flags: MemFlags,
268269
);
269270

271+
/// *Typed* copy for non-overlapping places.
272+
///
273+
/// Has a default implementation in terms of `memcpy`, but specific backends
274+
/// can override to do something smarter if possible.
275+
///
276+
/// (For example, typed load-stores with alias metadata.)
277+
fn typed_place_copy(
278+
&mut self,
279+
dst: PlaceRef<'tcx, Self::Value>,
280+
src: PlaceRef<'tcx, Self::Value>,
281+
) {
282+
debug_assert!(src.llextra.is_none());
283+
debug_assert!(dst.llextra.is_none());
284+
debug_assert_eq!(dst.layout.size, src.layout.size);
285+
if !dst.layout.is_zst() {
286+
let bytes = self.const_usize(dst.layout.size.bytes());
287+
self.memcpy(dst.llval, dst.align, src.llval, src.align, bytes, MemFlags::empty());
288+
}
289+
}
290+
270291
fn select(
271292
&mut self,
272293
cond: Self::Value,

compiler/rustc_codegen_ssa/src/traits/type_.rs

+14
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,20 @@ pub trait LayoutTypeMethods<'tcx>: Backend<'tcx> {
120120
immediate: bool,
121121
) -> Self::Type;
122122

123+
/// A type that produces an [`OperandValue::Ref`] when loaded.
124+
///
125+
/// AKA one that's not a ZST, not `is_backend_immediate`, and
126+
/// not `is_backend_scalar_pair`. For such a type, a
127+
/// [`load_operand`] doesn't actually `load` anything.
128+
///
129+
/// [`OperandValue::Ref`]: crate::mir::operand::OperandValue::Ref
130+
/// [`load_operand`]: super::BuilderMethods::load_operand
131+
fn is_backend_ref(&self, layout: TyAndLayout<'tcx>) -> bool {
132+
!(layout.is_zst()
133+
|| self.is_backend_immediate(layout)
134+
|| self.is_backend_scalar_pair(layout))
135+
}
136+
123137
/// A type that can be used in a [`super::BuilderMethods::load`] +
124138
/// [`super::BuilderMethods::store`] pair to implement a *typed* copy,
125139
/// such as a MIR `*_0 = *_1`.

compiler/rustc_const_eval/src/interpret/intrinsics.rs

+23-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ use rustc_span::symbol::{sym, Symbol};
2121
use rustc_target::abi::Size;
2222

2323
use super::{
24-
util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx, MPlaceTy, Machine, OpTy,
25-
Pointer,
24+
memory::MemoryKind, util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx,
25+
MPlaceTy, Machine, OpTy, Pointer,
2626
};
2727

2828
use crate::fluent_generated as fluent;
@@ -415,6 +415,9 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
415415
let result = self.raw_eq_intrinsic(&args[0], &args[1])?;
416416
self.write_scalar(result, dest)?;
417417
}
418+
sym::typed_swap => {
419+
self.typed_swap_intrinsic(&args[0], &args[1])?;
420+
}
418421

419422
sym::vtable_size => {
420423
let ptr = self.read_pointer(&args[0])?;
@@ -608,6 +611,24 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
608611
self.mem_copy(src, dst, size, nonoverlapping)
609612
}
610613

614+
/// Does a *typed* swap of `*left` and `*right`.
615+
pub(crate) fn typed_swap_intrinsic(
616+
&mut self,
617+
left: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
618+
right: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
619+
) -> InterpResult<'tcx> {
620+
let left = self.deref_pointer(left)?;
621+
let right = self.deref_pointer(right)?;
622+
debug_assert_eq!(left.layout, right.layout);
623+
let kind = MemoryKind::Stack;
624+
let temp = self.allocate(left.layout, kind)?;
625+
self.copy_op(&left, &temp)?;
626+
self.copy_op(&right, &left)?;
627+
self.copy_op(&temp, &right)?;
628+
self.deallocate_ptr(temp.ptr(), None, kind)?;
629+
Ok(())
630+
}
631+
611632
pub(crate) fn write_bytes_intrinsic(
612633
&mut self,
613634
dst: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,

compiler/rustc_hir_analysis/src/check/intrinsic.rs

+2
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,8 @@ pub fn check_intrinsic_type(
500500
(1, 0, vec![Ty::new_mut_ptr(tcx, param(0)), param(0)], Ty::new_unit(tcx))
501501
}
502502

503+
sym::typed_swap => (1, 1, vec![Ty::new_mut_ptr(tcx, param(0)); 2], Ty::new_unit(tcx)),
504+
503505
sym::discriminant_value => {
504506
let assoc_items = tcx.associated_item_def_ids(
505507
tcx.require_lang_item(hir::LangItem::DiscriminantKind, None),

compiler/rustc_span/src/symbol.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1835,6 +1835,7 @@ symbols! {
18351835
type_macros,
18361836
type_name,
18371837
type_privacy_lints,
1838+
typed_swap,
18381839
u128,
18391840
u128_legacy_const_max,
18401841
u128_legacy_const_min,

library/core/src/mem/mod.rs

+11-23
Original file line numberDiff line numberDiff line change
@@ -726,31 +726,19 @@ pub unsafe fn uninitialized<T>() -> T {
726726
#[rustc_const_unstable(feature = "const_swap", issue = "83163")]
727727
#[rustc_diagnostic_item = "mem_swap"]
728728
pub const fn swap<T>(x: &mut T, y: &mut T) {
729-
// NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
730-
// reinterpretation of values as (chunkable) byte arrays, and the loop in the
731-
// block optimization in `swap_slice` is hard to rewrite back
732-
// into the (unoptimized) direct swapping implementation, so we disable it.
733-
#[cfg(not(any(target_arch = "spirv")))]
734-
{
735-
// For types that are larger multiples of their alignment, the simple way
736-
// tends to copy the whole thing to stack rather than doing it one part
737-
// at a time, so instead treat them as one-element slices and piggy-back
738-
// the slice optimizations that will split up the swaps.
739-
if const { size_of::<T>() / align_of::<T>() > 2 } {
740-
// SAFETY: exclusive references always point to one non-overlapping
741-
// element and are non-null and properly aligned.
742-
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
729+
cfg_if! {
730+
// NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
731+
// reinterpretation of values as (chunkable) byte arrays, and the loop in the
732+
// block optimization in `ptr::swap_nonoverlapping` is hard to rewrite back
733+
// into the (unoptimized) direct swapping implementation, so we disable it.
734+
if #[cfg(any(target_arch = "spirv"))] {
735+
swap_simple(x, y)
736+
} else {
737+
// SAFETY: `&mut` guarantees these are typed readable and writable
738+
// as well as non-overlapping.
739+
unsafe { ptr::typed_swap(x, y) }
743740
}
744741
}
745-
746-
// If a scalar consists of just a small number of alignment units, let
747-
// the codegen just swap those pieces directly, as it's likely just a
748-
// few instructions and anything else is probably overcomplicated.
749-
//
750-
// Most importantly, this covers primitives and simd types that tend to
751-
// have size=align where doing anything else can be a pessimization.
752-
// (This will also be used for ZSTs, though any solution works for them.)
753-
swap_simple(x, y);
754742
}
755743

756744
/// Same as [`swap`] semantically, but always uses the simple implementation.

library/core/src/ptr/mod.rs

+17
Original file line numberDiff line numberDiff line change
@@ -954,6 +954,23 @@ pub const unsafe fn swap<T>(x: *mut T, y: *mut T) {
954954
}
955955
}
956956

957+
/// Non-overlapping *typed* swap of a single value.
958+
///
959+
/// The codegen backends will replace this with a better implementation when
960+
/// `T` is a simple type that can be an immediate.
961+
///
962+
/// # Safety
963+
///
964+
/// `x` and `y` are readable and writable as `T`, and non-overlapping.
965+
#[rustc_nounwind]
966+
#[inline]
967+
#[cfg_attr(not(bootstrap), rustc_intrinsic)]
968+
pub(crate) const unsafe fn typed_swap<T>(x: *mut T, y: *mut T) {
969+
// SAFETY: The caller provided single non-overlapping items behind
970+
// pointers, so swapping them with `count: 1` is fine.
971+
unsafe { swap_nonoverlapping(x, y, 1) };
972+
}
973+
957974
/// Swaps `count * size_of::<T>()` bytes between the two regions of memory
958975
/// beginning at `x` and `y`. The two regions must *not* overlap.
959976
///
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#![feature(rustc_attrs)]
2+
3+
use std::ptr::addr_of_mut;
4+
5+
#[rustc_intrinsic]
6+
const unsafe fn typed_swap<T, const WHATEVER: usize>(_: *mut T, _: *mut T) {
7+
unreachable!()
8+
}
9+
10+
fn invalid_array() {
11+
let mut a = [1_u8; 100];
12+
let mut b = [2_u8; 100];
13+
unsafe {
14+
let a = &mut *addr_of_mut!(a).cast::<[bool; 100]>();
15+
let b = &mut *addr_of_mut!(b).cast::<[bool; 100]>();
16+
typed_swap::<_, 0>(a, b); //~ERROR: constructing invalid value
17+
}
18+
}
19+
20+
fn main() {
21+
invalid_array();
22+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
error: Undefined Behavior: constructing invalid value at [0]: encountered 0x02, but expected a boolean
2+
--> $DIR/typed-swap-invalid-array.rs:LL:CC
3+
|
4+
LL | typed_swap::<_, 0>(a, b);
5+
| ^^^^^^^^^^^^^^^^^^^^^^^^ constructing invalid value at [0]: encountered 0x02, but expected a boolean
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `invalid_array` at $DIR/typed-swap-invalid-array.rs:LL:CC
11+
note: inside `main`
12+
--> $DIR/typed-swap-invalid-array.rs:LL:CC
13+
|
14+
LL | invalid_array();
15+
| ^^^^^^^^^^^^^^^
16+
17+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
18+
19+
error: aborting due to 1 previous error
20+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#![feature(rustc_attrs)]
2+
3+
use std::ptr::addr_of_mut;
4+
5+
#[rustc_intrinsic]
6+
const unsafe fn typed_swap<T, const WHATEVER: usize>(_: *mut T, _: *mut T) {
7+
unreachable!()
8+
}
9+
10+
fn invalid_scalar() {
11+
let mut a = 1_u8;
12+
let mut b = 2_u8;
13+
unsafe {
14+
let a = &mut *addr_of_mut!(a).cast::<bool>();
15+
let b = &mut *addr_of_mut!(b).cast::<bool>();
16+
typed_swap::<_, 0>(a, b); //~ERROR: constructing invalid value
17+
}
18+
}
19+
20+
fn main() {
21+
invalid_scalar();
22+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
error: Undefined Behavior: constructing invalid value: encountered 0x02, but expected a boolean
2+
--> $DIR/typed-swap-invalid-scalar.rs:LL:CC
3+
|
4+
LL | typed_swap::<_, 0>(a, b);
5+
| ^^^^^^^^^^^^^^^^^^^^^^^^ constructing invalid value: encountered 0x02, but expected a boolean
6+
|
7+
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
8+
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
9+
= note: BACKTRACE:
10+
= note: inside `invalid_scalar` at $DIR/typed-swap-invalid-scalar.rs:LL:CC
11+
note: inside `main`
12+
--> $DIR/typed-swap-invalid-scalar.rs:LL:CC
13+
|
14+
LL | invalid_scalar();
15+
| ^^^^^^^^^^^^^^^^
16+
17+
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
18+
19+
error: aborting due to 1 previous error
20+

tests/assembly/x86_64-typed-swap.rs

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
//@ only-x86_64
2+
//@ assembly-output: emit-asm
3+
//@ compile-flags: --crate-type=lib -O
4+
5+
use std::arch::x86_64::__m128;
6+
use std::mem::swap;
7+
8+
// CHECK-LABEL: swap_i32:
9+
#[no_mangle]
10+
pub fn swap_i32(x: &mut i32, y: &mut i32) {
11+
// CHECK: movl (%[[ARG1:.+]]), %[[T1:.+]]
12+
// CHECK: movl (%[[ARG2:.+]]), %[[T2:.+]]
13+
// CHECK: movl %[[T2]], (%[[ARG1]])
14+
// CHECK: movl %[[T1]], (%[[ARG2]])
15+
// CHECK: retq
16+
swap(x, y)
17+
}
18+
19+
// CHECK-LABEL: swap_pair:
20+
#[no_mangle]
21+
pub fn swap_pair(x: &mut (i32, u32), y: &mut (i32, u32)) {
22+
// CHECK: movq (%[[ARG1]]), %[[T1:.+]]
23+
// CHECK: movq (%[[ARG2]]), %[[T2:.+]]
24+
// CHECK: movq %[[T2]], (%[[ARG1]])
25+
// CHECK: movq %[[T1]], (%[[ARG2]])
26+
// CHECK: retq
27+
swap(x, y)
28+
}
29+
30+
// CHECK-LABEL: swap_str:
31+
#[no_mangle]
32+
pub fn swap_str<'a>(x: &mut &'a str, y: &mut &'a str) {
33+
// CHECK: movups (%[[ARG1]]), %[[T1:xmm.]]
34+
// CHECK: movups (%[[ARG2]]), %[[T2:xmm.]]
35+
// CHECK: movups %[[T2]], (%[[ARG1]])
36+
// CHECK: movups %[[T1]], (%[[ARG2]])
37+
// CHECK: retq
38+
swap(x, y)
39+
}
40+
41+
// CHECK-LABEL: swap_simd:
42+
#[no_mangle]
43+
pub fn swap_simd(x: &mut __m128, y: &mut __m128) {
44+
// CHECK: movaps (%[[ARG1]]), %[[T1:xmm.]]
45+
// CHECK: movaps (%[[ARG2]]), %[[T2:xmm.]]
46+
// CHECK: movaps %[[T2]], (%[[ARG1]])
47+
// CHECK: movaps %[[T1]], (%[[ARG2]])
48+
// CHECK: retq
49+
swap(x, y)
50+
}

0 commit comments

Comments
 (0)