Skip to content

Commit 604e677

Browse files
committed
add vldx neon instructions
1 parent 435f54a commit 604e677

File tree

7 files changed

+9032
-6915
lines changed

7 files changed

+9032
-6915
lines changed

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4514,6 +4514,84 @@ pub unsafe fn vqaddd_s64(a: i64, b: i64) -> i64 {
45144514
vqaddd_s64_(a, b)
45154515
}
45164516

4517+
/// Load multiple single-element structures to one, two, three, or four registers
4518+
#[inline]
4519+
#[target_feature(enable = "neon")]
4520+
#[cfg_attr(test, assert_instr(ld1))]
4521+
pub unsafe fn vld1_f64_x2(a: *const f64) -> float64x1x2_t {
4522+
#[allow(improper_ctypes)]
4523+
extern "unadjusted" {
4524+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v1f64.p0f64")]
4525+
fn vld1_f64_x2_(a: *const f64) -> float64x1x2_t;
4526+
}
4527+
vld1_f64_x2_(a)
4528+
}
4529+
4530+
/// Load multiple single-element structures to one, two, three, or four registers
4531+
#[inline]
4532+
#[target_feature(enable = "neon")]
4533+
#[cfg_attr(test, assert_instr(ld1))]
4534+
pub unsafe fn vld1q_f64_x2(a: *const f64) -> float64x2x2_t {
4535+
#[allow(improper_ctypes)]
4536+
extern "unadjusted" {
4537+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2f64.p0f64")]
4538+
fn vld1q_f64_x2_(a: *const f64) -> float64x2x2_t;
4539+
}
4540+
vld1q_f64_x2_(a)
4541+
}
4542+
4543+
/// Load multiple single-element structures to one, two, three, or four registers
4544+
#[inline]
4545+
#[target_feature(enable = "neon")]
4546+
#[cfg_attr(test, assert_instr(ld1))]
4547+
pub unsafe fn vld1_f64_x3(a: *const f64) -> float64x1x3_t {
4548+
#[allow(improper_ctypes)]
4549+
extern "unadjusted" {
4550+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v1f64.p0f64")]
4551+
fn vld1_f64_x3_(a: *const f64) -> float64x1x3_t;
4552+
}
4553+
vld1_f64_x3_(a)
4554+
}
4555+
4556+
/// Load multiple single-element structures to one, two, three, or four registers
4557+
#[inline]
4558+
#[target_feature(enable = "neon")]
4559+
#[cfg_attr(test, assert_instr(ld1))]
4560+
pub unsafe fn vld1q_f64_x3(a: *const f64) -> float64x2x3_t {
4561+
#[allow(improper_ctypes)]
4562+
extern "unadjusted" {
4563+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2f64.p0f64")]
4564+
fn vld1q_f64_x3_(a: *const f64) -> float64x2x3_t;
4565+
}
4566+
vld1q_f64_x3_(a)
4567+
}
4568+
4569+
/// Load multiple single-element structures to one, two, three, or four registers
4570+
#[inline]
4571+
#[target_feature(enable = "neon")]
4572+
#[cfg_attr(test, assert_instr(ld1))]
4573+
pub unsafe fn vld1_f64_x4(a: *const f64) -> float64x1x4_t {
4574+
#[allow(improper_ctypes)]
4575+
extern "unadjusted" {
4576+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v1f64.p0f64")]
4577+
fn vld1_f64_x4_(a: *const f64) -> float64x1x4_t;
4578+
}
4579+
vld1_f64_x4_(a)
4580+
}
4581+
4582+
/// Load multiple single-element structures to one, two, three, or four registers
4583+
#[inline]
4584+
#[target_feature(enable = "neon")]
4585+
#[cfg_attr(test, assert_instr(ld1))]
4586+
pub unsafe fn vld1q_f64_x4(a: *const f64) -> float64x2x4_t {
4587+
#[allow(improper_ctypes)]
4588+
extern "unadjusted" {
4589+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2f64.p0f64")]
4590+
fn vld1q_f64_x4_(a: *const f64) -> float64x2x4_t;
4591+
}
4592+
vld1q_f64_x4_(a)
4593+
}
4594+
45174595
/// Multiply
45184596
#[inline]
45194597
#[target_feature(enable = "neon")]
@@ -12857,6 +12935,54 @@ mod test {
1285712935
assert_eq!(r, e);
1285812936
}
1285912937

12938+
#[simd_test(enable = "neon")]
12939+
unsafe fn test_vld1_f64_x2() {
12940+
let a: [f64; 3] = [0., 1., 2.];
12941+
let e: [f64; 2] = [1., 2.];
12942+
let r: [f64; 2] = transmute(vld1_f64_x2(a[1..].as_ptr()));
12943+
assert_eq!(r, e);
12944+
}
12945+
12946+
#[simd_test(enable = "neon")]
12947+
unsafe fn test_vld1q_f64_x2() {
12948+
let a: [f64; 5] = [0., 1., 2., 3., 4.];
12949+
let e: [f64x2; 2] = [f64x2::new(1., 2.), f64x2::new(3., 4.)];
12950+
let r: [f64x2; 2] = transmute(vld1q_f64_x2(a[1..].as_ptr()));
12951+
assert_eq!(r, e);
12952+
}
12953+
12954+
#[simd_test(enable = "neon")]
12955+
unsafe fn test_vld1_f64_x3() {
12956+
let a: [f64; 4] = [0., 1., 2., 3.];
12957+
let e: [f64; 3] = [1., 2., 3.];
12958+
let r: [f64; 3] = transmute(vld1_f64_x3(a[1..].as_ptr()));
12959+
assert_eq!(r, e);
12960+
}
12961+
12962+
#[simd_test(enable = "neon")]
12963+
unsafe fn test_vld1q_f64_x3() {
12964+
let a: [f64; 7] = [0., 1., 2., 3., 4., 5., 6.];
12965+
let e: [f64x2; 3] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.)];
12966+
let r: [f64x2; 3] = transmute(vld1q_f64_x3(a[1..].as_ptr()));
12967+
assert_eq!(r, e);
12968+
}
12969+
12970+
#[simd_test(enable = "neon")]
12971+
unsafe fn test_vld1_f64_x4() {
12972+
let a: [f64; 5] = [0., 1., 2., 3., 4.];
12973+
let e: [f64; 4] = [1., 2., 3., 4.];
12974+
let r: [f64; 4] = transmute(vld1_f64_x4(a[1..].as_ptr()));
12975+
assert_eq!(r, e);
12976+
}
12977+
12978+
#[simd_test(enable = "neon")]
12979+
unsafe fn test_vld1q_f64_x4() {
12980+
let a: [f64; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.];
12981+
let e: [f64x2; 4] = [f64x2::new(1., 2.), f64x2::new(3., 4.), f64x2::new(5., 6.), f64x2::new(7., 8.)];
12982+
let r: [f64x2; 4] = transmute(vld1q_f64_x4(a[1..].as_ptr()));
12983+
assert_eq!(r, e);
12984+
}
12985+
1286012986
#[simd_test(enable = "neon")]
1286112987
unsafe fn test_vmul_f64() {
1286212988
let a: f64 = 1.0;

crates/core_arch/src/aarch64/neon/mod.rs

Lines changed: 12 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -25,45 +25,25 @@ types! {
2525
pub struct float64x2_t(f64, f64);
2626
}
2727

28-
/// ARM-specific type containing two `int8x16_t` vectors.
28+
/// ARM-specific type containing two `float64x1_t` vectors.
2929
#[derive(Copy, Clone)]
30-
pub struct int8x16x2_t(pub int8x16_t, pub int8x16_t);
31-
/// ARM-specific type containing three `int8x16_t` vectors.
30+
pub struct float64x1x2_t(pub float64x1_t, pub float64x1_t);
31+
/// ARM-specific type containing three `float64x1_t` vectors.
3232
#[derive(Copy, Clone)]
33-
pub struct int8x16x3_t(pub int8x16_t, pub int8x16_t, pub int8x16_t);
34-
/// ARM-specific type containing four `int8x16_t` vectors.
33+
pub struct float64x1x3_t(pub float64x1_t, pub float64x1_t, pub float64x1_t);
34+
/// ARM-specific type containing four `float64x1_t` vectors.
3535
#[derive(Copy, Clone)]
36-
pub struct int8x16x4_t(pub int8x16_t, pub int8x16_t, pub int8x16_t, pub int8x16_t);
36+
pub struct float64x1x4_t(pub float64x1_t, pub float64x1_t, pub float64x1_t, pub float64x1_t);
3737

38-
/// ARM-specific type containing two `uint8x16_t` vectors.
38+
/// ARM-specific type containing two `float64x2_t` vectors.
3939
#[derive(Copy, Clone)]
40-
pub struct uint8x16x2_t(pub uint8x16_t, pub uint8x16_t);
41-
/// ARM-specific type containing three `uint8x16_t` vectors.
40+
pub struct float64x2x2_t(pub float64x2_t, pub float64x2_t);
41+
/// ARM-specific type containing three `float64x2_t` vectors.
4242
#[derive(Copy, Clone)]
43-
pub struct uint8x16x3_t(pub uint8x16_t, pub uint8x16_t, pub uint8x16_t);
44-
/// ARM-specific type containing four `uint8x16_t` vectors.
43+
pub struct float64x2x3_t(pub float64x2_t, pub float64x2_t, pub float64x2_t);
44+
/// ARM-specific type containing four `float64x2_t` vectors.
4545
#[derive(Copy, Clone)]
46-
pub struct uint8x16x4_t(
47-
pub uint8x16_t,
48-
pub uint8x16_t,
49-
pub uint8x16_t,
50-
pub uint8x16_t,
51-
);
52-
53-
/// ARM-specific type containing two `poly8x16_t` vectors.
54-
#[derive(Copy, Clone)]
55-
pub struct poly8x16x2_t(pub poly8x16_t, pub poly8x16_t);
56-
/// ARM-specific type containing three `poly8x16_t` vectors.
57-
#[derive(Copy, Clone)]
58-
pub struct poly8x16x3_t(pub poly8x16_t, pub poly8x16_t, pub poly8x16_t);
59-
/// ARM-specific type containing four `poly8x16_t` vectors.
60-
#[derive(Copy, Clone)]
61-
pub struct poly8x16x4_t(
62-
pub poly8x16_t,
63-
pub poly8x16_t,
64-
pub poly8x16_t,
65-
pub poly8x16_t,
66-
);
46+
pub struct float64x2x4_t(pub float64x2_t, pub float64x2_t, pub float64x2_t, pub float64x2_t);
6747

6848
#[allow(improper_ctypes)]
6949
extern "C" {

0 commit comments

Comments
 (0)