Skip to content

Commit e871acb

Browse files
AngelicosPhosphorosscottmcm
authored andcommitted
Tweak the threshold for chunked swapping
Thanks to 98892 for the tests I brought in here, as it demonstrated that 3×usize is currently suboptimal.
1 parent 128148d commit e871acb

File tree

2 files changed

+40
-10
lines changed

2 files changed

+40
-10
lines changed

library/core/src/mem/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -736,7 +736,7 @@ pub const fn swap<T>(x: &mut T, y: &mut T) {
736736
// tends to copy the whole thing to stack rather than doing it one part
737737
// at a time, so instead treat them as one-element slices and piggy-back
738738
// the slice optimizations that will split up the swaps.
739-
if size_of::<T>() / align_of::<T>() > 4 {
739+
if const { size_of::<T>() / align_of::<T>() > 2 } {
740740
// SAFETY: exclusive references always point to one non-overlapping
741741
// element and are non-null and properly aligned.
742742
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };

tests/codegen/swap-small-types.rs

+39-9
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,32 @@ pub fn swap_rgba64(x: &mut RGBA64, y: &mut RGBA64) {
4848
swap(x, y)
4949
}
5050

51+
// CHECK-LABEL: @swap_vecs
52+
#[no_mangle]
53+
pub fn swap_vecs(x: &mut Vec<u32>, y: &mut Vec<u32>) {
54+
// CHECK-NOT: alloca
55+
// CHECK: ret void
56+
swap(x, y)
57+
}
58+
59+
// CHECK-LABEL: @swap_slices
60+
#[no_mangle]
61+
pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
62+
// CHECK-NOT: alloca
63+
// CHECK: ret void
64+
swap(x, y)
65+
}
66+
5167
// LLVM doesn't vectorize a loop over 3-byte elements,
5268
// so we chunk it down to bytes and loop over those instead.
5369
type RGB24 = [u8; 3];
5470

5571
// CHECK-LABEL: @swap_rgb24_slices
5672
#[no_mangle]
5773
pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
58-
// CHECK-NOT: alloca
59-
// CHECK: load <{{[0-9]+}} x i8>
60-
// CHECK: store <{{[0-9]+}} x i8>
74+
// CHECK-NOT: alloca
75+
// CHECK: load <{{[0-9]+}} x i8>
76+
// CHECK: store <{{[0-9]+}} x i8>
6177
if x.len() == y.len() {
6278
x.swap_with_slice(y);
6379
}
@@ -69,9 +85,9 @@ type RGBA32 = [u8; 4];
6985
// CHECK-LABEL: @swap_rgba32_slices
7086
#[no_mangle]
7187
pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
72-
// CHECK-NOT: alloca
73-
// CHECK: load <{{[0-9]+}} x i32>
74-
// CHECK: store <{{[0-9]+}} x i32>
88+
// CHECK-NOT: alloca
89+
// CHECK: load <{{[0-9]+}} x i32>
90+
// CHECK: store <{{[0-9]+}} x i32>
7591
if x.len() == y.len() {
7692
x.swap_with_slice(y);
7793
}
@@ -84,10 +100,24 @@ const _: () = assert!(!std::mem::size_of::<String>().is_power_of_two());
84100
// CHECK-LABEL: @swap_string_slices
85101
#[no_mangle]
86102
pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
87-
// CHECK-NOT: alloca
88-
// CHECK: load <{{[0-9]+}} x i64>
89-
// CHECK: store <{{[0-9]+}} x i64>
103+
// CHECK-NOT: alloca
104+
// CHECK: load <{{[0-9]+}} x i64>
105+
// CHECK: store <{{[0-9]+}} x i64>
90106
if x.len() == y.len() {
91107
x.swap_with_slice(y);
92108
}
93109
}
110+
111+
#[repr(C, packed)]
112+
pub struct Packed {
113+
pub first: bool,
114+
pub second: usize,
115+
}
116+
117+
// CHECK-LABEL: @swap_packed_structs
118+
#[no_mangle]
119+
pub fn swap_packed_structs(x: &mut Packed, y: &mut Packed) {
120+
// CHECK-NOT: alloca
121+
// CHECK: ret void
122+
swap(x, y)
123+
}

0 commit comments

Comments
 (0)