Skip to content

Commit 2c621d0

Browse files
committed
optimize memcpy for ARM
just as it was done in the previous commit memcpy is now written in assembly. Measurements below. | Bytes | HEAD | this PR | diff | | ----- | ---- | ------- | --------- | | 0 | 6 | 10 | +66.6667% | | 1 | 12 | 16 | +33.3333% | | 2 | 18 | 24 | +33.3333% | | 3 | 24 | 32 | +33.3333% | | 4 | 30 | 20 | -33.3333% | | 16 | 102 | 50 | -50.9804% | | 64 | 390 | 170 | -56.4103% | | 256 | 1796 | 650 | -63.8085% | | 1024 | 7172 | 2570 | -64.1662% | All times are in clock cycles. The measurements were done on a Cortex-M3 processor running at 8 MHz.
1 parent 513e246 commit 2c621d0

File tree

2 files changed

+112
-13
lines changed

2 files changed

+112
-13
lines changed

src/arm.rs

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use core::intrinsics;
22

33
#[cfg(feature = "mem")]
4-
use mem::{memcpy, memmove, memset};
4+
use mem::{memmove, memset};
55

66
// NOTE This function and the ones below are implemented using assembly because they using a custom
77
// calling convention which can't be implemented using a normal Rust function
@@ -209,18 +209,48 @@ __aeabi_memset8:
209209
5: bx lr
210210
"#);
211211

212-
#[cfg_attr(not(test), no_mangle)]
213-
pub unsafe extern "aapcs" fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize) {
214-
memcpy(dest, src, n);
215-
}
216-
#[cfg_attr(not(test), no_mangle)]
217-
pub unsafe extern "aapcs" fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize) {
218-
memcpy(dest, src, n);
219-
}
220-
#[cfg_attr(not(test), no_mangle)]
221-
pub unsafe extern "aapcs" fn __aeabi_memcpy8(dest: *mut u8, src: *const u8, n: usize) {
222-
memcpy(dest, src, n);
223-
}
212+
// Assembly optimized memcpy{,4,8}
213+
global_asm!(r"
214+
@ fn __aeabi_memcpy(r0: *mut u8, r1: *const u8, r2: usize)
215+
.global __aeabi_memcpy
216+
217+
@ fn __aeabi_memcpy4(r0: *mut u8, r1: *const u8, r2: usize)
218+
@ r0 and r1 are 4-byte aligned
219+
.global __aeabi_memcpy4
220+
221+
@ fn __aeabi_memcpy8(r0: *mut u8, r1: *const u8, r2: usize)
222+
@ r0 and r1 are 8-byte aligned
223+
.global __aeabi_memcpy8
224+
225+
__aeabi_memcpy4:
226+
__aeabi_memcpy8:
227+
@ word-wise copy loop
228+
@ branch to '__aeabi_memcpy' if r2 < 4
229+
cmp r2, #4
230+
blt __aeabi_memcpy
231+
232+
ldr r3, [r1]
233+
adds r1, 4
234+
str r3, [r0]
235+
adds r0, 4
236+
subs r2, r2, #4
237+
b __aeabi_memcpy4
238+
239+
__aeabi_memcpy:
240+
@ branch to '2' if r2 == 0
241+
cmp r2, #0
242+
beq 2f
243+
244+
1: @ byte-wise copy loop
245+
ldrb r3, [r1]
246+
adds r1, #1
247+
strb r3, [r0]
248+
adds r0, #1
249+
subs r2, r2, #1
250+
bne 1b
251+
252+
2: bx lr
253+
"#);
224254

225255
#[cfg_attr(not(test), no_mangle)]
226256
pub unsafe extern "aapcs" fn __aeabi_memmove(dest: *mut u8, src: *const u8, n: usize) {

tests/aeabi_memcpy.rs

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#![cfg(all(target_arch = "arm",
2+
not(any(target_env = "gnu", target_env = "musl")),
3+
target_os = "linux",
4+
feature = "mem"))]
5+
#![feature(compiler_builtins_lib)]
6+
#![no_std]
7+
8+
extern crate compiler_builtins;
9+
10+
// test runner
11+
extern crate utest_cortex_m_qemu;
12+
13+
// overrides `panic!`
14+
#[macro_use]
15+
extern crate utest_macros;
16+
17+
macro_rules! panic {
18+
($($tt:tt)*) => {
19+
upanic!($($tt)*);
20+
};
21+
}
22+
23+
extern "C" {
24+
fn __aeabi_memcpy(dest: *mut u8, src: *const u8, n: usize);
25+
fn __aeabi_memcpy4(dest: *mut u8, src: *const u8, n: usize);
26+
}
27+
28+
struct Aligned {
29+
array: [u8; 8],
30+
_alignment: [u32; 0],
31+
}
32+
33+
impl Aligned {
34+
fn new(array: [u8; 8]) -> Self {
35+
Aligned {
36+
array: array,
37+
_alignment: [],
38+
}
39+
}
40+
}
41+
42+
#[test]
43+
fn memcpy() {
44+
let mut dest = [0; 4];
45+
let src = [0xde, 0xad, 0xbe, 0xef];
46+
47+
for n in 0..dest.len() {
48+
dest.copy_from_slice(&[0; 4]);
49+
50+
unsafe { __aeabi_memcpy(dest.as_mut_ptr(), src.as_ptr(), n) }
51+
52+
assert_eq!(&dest[0..n], &src[0..n])
53+
}
54+
}
55+
56+
#[test]
57+
fn memcpy4() {
58+
let mut aligned = Aligned::new([0; 8]);
59+
let dest = &mut aligned.array;
60+
let src = [0xde, 0xad, 0xbe, 0xef, 0xba, 0xad, 0xf0, 0x0d];
61+
62+
for n in 0..dest.len() {
63+
dest.copy_from_slice(&[0; 8]);
64+
65+
unsafe { __aeabi_memcpy4(dest.as_mut_ptr(), src.as_ptr(), n) }
66+
67+
assert_eq!(&dest[0..n], &src[0..n])
68+
}
69+
}

0 commit comments

Comments
 (0)