Skip to content

ARM NEON Intrinsics #1

Open
Open
@aloucks

Description

@aloucks

ARM/NEON intrinsics require nightly and not all are available in core::arch.

See also:

rust-lang/stdarch#148

https://doc.rust-lang.org/core/arch/aarch64/index.html

helper script

use regex::Regex;
use std::collections::BTreeMap;
use std::fs;
use std::process::Command;

fn main() -> Result<(), Box<dyn std::error::Error>> {
    let r = Regex::new("(v[^\\s|^\\(]+_[f|u|i|s]\\d+)\\(")?;
    let filenames = [
        "../DirectXMath/Inc/DirectXMath.h",
        "../DirectXMath/Inc/DirectXMathVector.inl",
        "../DirectXMath/Inc/DirectXMathMatrix.inl",
        "../DirectXMath/Inc/DirectXMathMisc.inl",
        "../DirectXMath/Inc/DirectXMathConvert.inl",
    ];
    let mut fn_names = BTreeMap::new();
    for filename in filenames.iter() {
        let text = fs::read_to_string(filename)?;
        for captures in r.captures_iter(&text) {
            if let Some(fn_name) = captures.get(1) {
                let count = fn_names.entry(fn_name.as_str().to_owned()).or_insert(0);
                *count += 1;
            }
        }
    }

    let _ = Command::new("curl")
        .args(&[
            "-LORJ",
            "https://doc.rust-lang.org/core/arch/aarch64/index.html",
        ])
        .output()?;

    let index = fs::read_to_string("index.html")?;
    let mut status = Vec::new();
    let mut missing = 0;
    for (fn_name, _) in fn_names.iter() {
        let exists = index.contains(fn_name);
        status.push((fn_name, exists));
        if !exists {
            missing += 1;
        }
    }
    println!(
        "### Neon Intrinsics ({}/{})",
        fn_names.len() - missing,
        fn_names.len()
    );
    for (fn_name, exists) in status {
        println!(
            "- [{}] `{}` ({})",
            if exists { "x" } else { " " },
            fn_name,
            fn_names[fn_name]
        );
    }
    Ok(())
}

Neon Intrinsics (59/164)

  • vabs_f32 (1)
  • vabsq_f32 (20)
  • vacle_f32 (1)
  • vacleq_f32 (3)
  • vadd_f32 (15)
  • vaddq_f32 (25)
  • vaddq_s32 (2)
  • vand_u32 (3)
  • vandq_s32 (12)
  • vandq_u32 (58)
  • vbicq_s32 (1)
  • vbicq_u32 (10)
  • vbsl_f32 (16)
  • vbslq_f32 (65)
  • vbslq_s32 (6)
  • vceq_f32 (17)
  • vceq_u32 (3)
  • vceqq_f32 (29)
  • vceqq_s32 (7)
  • vceqq_u32 (20)
  • vcge_f32 (2)
  • vcgeq_f32 (12)
  • vcgt_f32 (3)
  • vcgtq_f32 (13)
  • vcgtq_s32 (9)
  • vcle_f32 (4)
  • vcleq_f32 (28)
  • vclt_f32 (1)
  • vcltq_f32 (9)
  • vcltq_s32 (3)
  • vcombine_f32 (100)
  • vcombine_s32 (2)
  • vcombine_u32 (9)
  • vcreate_f32 (7)
  • vcreate_s32 (2)
  • vcreate_u32 (6)
  • vcvt_f32_s32 (1)
  • vcvt_f32_u32 (1)
  • vcvt_s32_f32 (1)
  • vcvt_u32_f32 (1)
  • vcvtq_f32_s32 (13)
  • vcvtq_f32_u32 (3)
  • vcvtq_s32_f32 (7)
  • vcvtq_u32_f32 (4)
  • vdiv_f32 (1)
  • vdivq_f32 (24)
  • vdup_lane_f32 (14)
  • vdup_n_f32 (23)
  • vdup_n_s32 (1)
  • vdup_n_u32 (4)
  • vdupq_lane_f32 (101)
  • vdupq_n_f32 (39)
  • vdupq_n_s32 (3)
  • vdupq_n_u32 (8)
  • veorq_u32 (3)
  • vextq_f32 (27)
  • vfmaq_f32 (1)
  • vfmsq_f32 (1)
  • vget_high_f32 (146)
  • vget_high_u32 (1)
  • vget_high_u8 (41)
  • vget_lane_u32 (42)
  • vget_lane_u64 (16)
  • vget_low_f32 (218)
  • vget_low_s32 (1)
  • vget_low_u32 (16)
  • vget_low_u8 (41)
  • vgetq_lane_f32 (12)
  • vgetq_lane_u32 (4)
  • vld1_f32 (11)
  • vld1_lane_f32 (6)
  • vld1_lane_s32 (1)
  • vld1_lane_u32 (2)
  • vld1_s32 (2)
  • vld1_u32 (5)
  • vld1q_dup_f32 (1)
  • vld1q_dup_u32 (1)
  • vld1q_f32 (22)
  • vld1q_lane_f32 (5)
  • vld1q_lane_u32 (5)
  • vld1q_s32 (1)
  • vld1q_u32 (4)
  • vld2q_f32 (3)
  • vld3q_f32 (5)
  • vld4_f32 (2)
  • vld4q_f32 (1)
  • vmaxq_f32 (7)
  • vminq_f32 (3)
  • vmla_f32 (3)
  • vmlaq_f32 (107)
  • vmlaq_lane_f32 (148)
  • vmlaq_n_f32 (10)
  • vmls_f32 (3)
  • vmlsq_f32 (18)
  • vmul_f32 (69)
  • vmulq_f32 (139)
  • vmulq_lane_f32 (33)
  • vmulq_n_f32 (8)
  • vmvnq_u32 (7)
  • vneg_f32 (1)
  • vnegq_f32 (5)
  • vnegq_s32 (4)
  • vorrq_s32 (7)
  • vorrq_u32 (17)
  • vpadd_f32 (24)
  • vpaddq_f32 (2)
  • vrecpe_f32 (3)
  • vrecpeq_f32 (10)
  • vrecps_f32 (6)
  • vrecpsq_f32 (18)
  • vreinterpret_f32_u32 (2)
  • vreinterpret_f32_u8 (4)
  • vreinterpret_u16_u8 (84)
  • vreinterpret_u32_f32 (1)
  • vreinterpret_u32_u16 (42)
  • vreinterpret_u64_u32 (16)
  • vreinterpret_u8_f32 (6)
  • vreinterpret_u8_u32 (6)
  • vreinterpretq_f32_s32 (6)
  • vreinterpretq_f32_u32 (97)
  • vreinterpretq_s32_f32 (24)
  • vreinterpretq_s32_u32 (9)
  • vreinterpretq_u32_f32 (109)
  • vreinterpretq_u32_s32 (2)
  • vreinterpretq_u8_u32 (82)
  • vrev64_f32 (47)
  • vrev64q_f32 (2)
  • vrev64q_u32 (2)
  • vrndmq_f32 (1)
  • vrndnq_f32 (1)
  • vrndpq_f32 (1)
  • vrndq_f32 (1)
  • vrsqrte_f32 (19)
  • vrsqrteq_f32 (6)
  • vrsqrts_f32 (20)
  • vrsqrtsq_f32 (10)
  • vsetq_lane_f32 (60)
  • vsetq_lane_u32 (5)
  • vshlq_n_s32 (6)
  • vshlq_s32 (7)
  • vshrq_n_s32 (8)
  • vst1_f32 (10)
  • vst1_s32 (2)
  • vst1_u32 (6)
  • vst1q_f32 (27)
  • vst1q_lane_f32 (12)
  • vst1q_lane_s32 (1)
  • vst1q_lane_u32 (8)
  • vst1q_s32 (1)
  • vst1q_u32 (3)
  • vst2q_f32 (2)
  • vst3q_f32 (4)
  • vst4q_f32 (3)
  • vsub_f32 (1)
  • vsubq_f32 (46)
  • vsubq_s32 (9)
  • vtbl2_u8 (2)
  • vtbl4_u8 (2)
  • vtrnq_f32 (4)
  • vtss_f32 (4)
  • vuzpq_f32 (4)
  • vzip_u16 (42)
  • vzip_u8 (42)
  • vzipq_f32 (22)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions