Skip to content

non-x86 runtime detection updates #229

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 12, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions src/runtime/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@ pub enum __Feature {
}

pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
let value: usize = 0;
let mut value: usize = 0;
{
let mut enable_feature = |f| {
if x.has_feature(&f) {
bit::set(value, f as u32);
value = bit::set(value, f as u32);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for fixing this! The x86 run-time did not have this issue, I wish there was an easy way to unify this code. https://github.com/rust-lang-nursery/stdsimd/blob/master/coresimd/src/runtime/x86.rs#L329

}
};
enable_feature(__Feature::asimd);
Expand All @@ -45,6 +45,25 @@ pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
value
}

/// Probe the ELF Auxiliary vector for hardware capabilities
///
/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
///
/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
impl linux::FeatureQuery for linux::AuxVec {
fn has_feature(&mut self, x: &__Feature) -> bool {
use self::__Feature::*;
if let Some(caps) = self.lookup(linux::AT::HWCAP) {
match *x {
asimd => caps & (1 << 1) != 0,
pmull => caps & (1 << 4) != 0,
}
} else {
false
}
}
}

impl linux::FeatureQuery for linux::CpuInfo {
fn has_feature(&mut self, x: &__Feature) -> bool {
use self::__Feature::*;
Expand Down
23 changes: 21 additions & 2 deletions src/runtime/arm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ pub enum __Feature {
}

pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
let value: usize = 0;
let mut value: usize = 0;
{
let mut enable_feature = |f| {
if x.has_feature(&f) {
bit::set(value, f as u32);
value = bit::set(value, f as u32);
}
};
enable_feature(__Feature::neon);
Expand All @@ -42,6 +42,25 @@ pub fn detect_features<T: linux::FeatureQuery>(mut x: T) -> usize {
value
}

/// Probe the ELF Auxiliary vector for hardware capabilities
///
/// The values are part of the platform-specific [asm/hwcap.h][hwcap]
///
/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h
impl linux::FeatureQuery for linux::AuxVec {
fn has_feature(&mut self, x: &__Feature) -> bool {
use self::__Feature::*;
match *x {
neon => self.lookup(linux::AT::HWCAP)
.map(|caps| caps & (1 << 12) != 0)
.unwrap_or(false),
pmull => self.lookup(linux::AT::HWCAP2)
.map(|caps| caps & (1 << 1) != 0)
.unwrap_or(false),
}
}
}

/// Is the CPU known to have a broken NEON unit?
///
/// See https://crbug.com/341598.
Expand Down
89 changes: 89 additions & 0 deletions src/runtime/linux/auxvec.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
//! Reads /proc/self/auxv on Linux systems

use std::prelude::v1::*;
use std::slice;
use std::mem;

/// Simple abstraction for the ELF Auxiliary Vector
///
/// the elf.h provide the layout of the single entry as auxv_t.
/// The desugared version is a usize tag followed by a union with
/// the same storage size.
///
/// Cache only the HWCAP and HWCAP2 entries.
#[derive(Debug)]
pub struct AuxVec {
hwcap: Option<usize>,
hwcap2: Option<usize>,
}

#[derive(Clone, Debug, PartialEq)]
#[allow(dead_code)]
/// ELF Auxiliary vector entry types
///
/// The entry types are specified in [linux/auxvec.h][auxvec_h].
///
/// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h
pub enum AT {
/// CPU Hardware capabilities, it is a bitfield.
HWCAP = 16,
/// CPU Hardware capabilities, additional bitfield.
HWCAP2 = 26,
}

impl AuxVec {
/// Reads the ELF Auxiliary Vector
///
/// Try to read `/proc/self/auxv`.
// TODO: Make use of getauxval once it is available in a
// reliable way.
pub fn new() -> Result<Self, ::std::io::Error> {
use std::io::Read;
let mut file = ::std::fs::File::open("/proc/self/auxv")?;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apparently it is common on Android for this file to not be readable (permissions wise), but getauxval() is widely available because bionic (their libc) has implemented it for some years now.

let mut buf = [0usize; 64];
let mut raw = unsafe {
slice::from_raw_parts_mut(
buf.as_mut_ptr() as *mut u8,
buf.len() * mem::size_of::<usize>(),
)
};

let _ = file.read(&mut raw)?;

mem::forget(raw);

let mut auxv = AuxVec { hwcap: None, hwcap2: None };

for el in buf.chunks(2) {
if el[0] == AT::HWCAP as usize {
auxv.hwcap = Some(el[1]);
}
if el[0] == AT::HWCAP2 as usize {
auxv.hwcap2 = Some(el[1]);
}
}

Ok(auxv)
}

/// Returns the value for the AT key
pub fn lookup(&self, key: AT) -> Option<usize> {
match key {
AT::HWCAP => self.hwcap,
AT::HWCAP2 => self.hwcap2,
}
}
}

#[cfg(test)]
mod tests {
use super::*;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if its easily possible but we should try to refactor this a bit so that we can test it by adding one or two dumps of auxv for arm cpus.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not have an arm system handy right now, could somebody else do that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah just open an issue and tag it here. Otherwise you can also try googling for dumps of auxv, maybe you get lucky :D

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually it won't work since the auxv entry size is system dependent so the synthetic 32bit tests would fail on 64bit platforms.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could use #[cfg(target_pointer_width = "32")] to enable the tests on 32-bit platforms or to add different tests for 32 and 64 bit platforms.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My idea is to avoid getauxval completely and just walk from argv in imp::init() and save a GLOBAL_AUXV_PTR there. then wrap the whole thing as it is done for args.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before we go to the extent of bypassing getauxval() and doing our own unsafe version, I think it would make sense to evaluate whether or not there is actually a problem with getauxval() availability in practice. Are people going to be writing Rust code that uses SIMD that gets deployed on very old Android devices? I suspect that that is likely to be a marginal use case at most.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I assume that the getauxval() support is sorted out already and I looked at how hard would be avoiding it to support the fringe cases I mentioned.

Apparently it would be within the 50-lines range.

Copy link
Contributor

@gnzlbg gnzlbg Dec 15, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lu-zero I thought that the init call was only available in std, is it also usable from core?


@marshallpierce once getauxval lands things will be in my opinion very good for Linux users and my time is better spent on improving things for macos users (macos x86 users are fine already, but iOS users on ARM chips are not). Having said this, if someone wants to spearhead support for libc's without getauxval on coresimd I'd completely support that (for steed users it might be better to just implement getauxval on steed though). Then there is also arm-thumb users which are currently left a bit in the dark; we might be able to help them in coresimd but it is unclear to me what the best way to do that is (probably offer something like what we offer for x86).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lu-zero if you want to pursue this a good way to get initial feedback is to fill an issue with your thoughts on the matter, we are kind of mixing many different discussions in this PR, and while many are subscribed to the issues in this repo, most of them are probably not being notified about any of this.

#[cfg(target_os = "linux")]
#[test]
fn test_auxvec_linux() {
let auxvec = AuxVec::new().unwrap();
println!("{:?}", auxvec.lookup(AT::HWCAP));
println!("{:?}", auxvec);
}
}
85 changes: 85 additions & 0 deletions src/runtime/linux/cpuinfo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,4 +214,89 @@ CPU revision : 1";
assert!(cpuinfo.field("Features").has("neon"));
assert!(cpuinfo.field("Features").has("asimd"));
}

const POWER8E_POWERKVM: &str = r"processor : 0
cpu : POWER8E (raw), altivec supported
clock : 3425.000000MHz
revision : 2.1 (pvr 004b 0201)

processor : 1
cpu : POWER8E (raw), altivec supported
clock : 3425.000000MHz
revision : 2.1 (pvr 004b 0201)

processor : 2
cpu : POWER8E (raw), altivec supported
clock : 3425.000000MHz
revision : 2.1 (pvr 004b 0201)

processor : 3
cpu : POWER8E (raw), altivec supported
clock : 3425.000000MHz
revision : 2.1 (pvr 004b 0201)

timebase : 512000000
platform : pSeries
model : IBM pSeries (emulated by qemu)
machine : CHRP IBM pSeries (emulated by qemu)";

#[test]
fn test_cpuinfo_linux_power8_powerkvm() {
let cpuinfo = CpuInfo::from_str(POWER8E_POWERKVM).unwrap();
assert_eq!(cpuinfo.field("cpu"), "POWER8E (raw), altivec supported");

assert!(cpuinfo.field("cpu").has("altivec"));
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks great, thanks! Could you add a second test for a CPU without altivec, like for example this one: https://github.com/randombit/cpuinfo/blob/master/ppc/power5 ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lu-zero it looks like the Power5 test got lost in the merge?


const POWER5P: &str = r"processor : 0
cpu : POWER5+ (gs)
clock : 1900.098000MHz
revision : 2.1 (pvr 003b 0201)

processor : 1
cpu : POWER5+ (gs)
clock : 1900.098000MHz
revision : 2.1 (pvr 003b 0201)

processor : 2
cpu : POWER5+ (gs)
clock : 1900.098000MHz
revision : 2.1 (pvr 003b 0201)

processor : 3
cpu : POWER5+ (gs)
clock : 1900.098000MHz
revision : 2.1 (pvr 003b 0201)

processor : 4
cpu : POWER5+ (gs)
clock : 1900.098000MHz
revision : 2.1 (pvr 003b 0201)

processor : 5
cpu : POWER5+ (gs)
clock : 1900.098000MHz
revision : 2.1 (pvr 003b 0201)

processor : 6
cpu : POWER5+ (gs)
clock : 1900.098000MHz
revision : 2.1 (pvr 003b 0201)

processor : 7
cpu : POWER5+ (gs)
clock : 1900.098000MHz
revision : 2.1 (pvr 003b 0201)

timebase : 237331000
platform : pSeries
machine : CHRP IBM,9133-55A";

#[test]
fn test_cpuinfo_linux_power5p() {
let cpuinfo = CpuInfo::from_str(POWER5P).unwrap();
assert_eq!(cpuinfo.field("cpu"), "POWER5+ (gs)");

assert!(!cpuinfo.field("cpu").has("altivec"));
}
}
14 changes: 12 additions & 2 deletions src/runtime/linux/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
mod cpuinfo;
pub use self::cpuinfo::CpuInfo;

mod auxvec;
pub use self::auxvec::*;

use super::__Feature;

pub trait FeatureQuery {
Expand All @@ -17,11 +20,18 @@ fn detect_features_impl<T: FeatureQuery>(x: T) -> usize {
{
super::aarch64::detect_features(x)
}
#[cfg(target_arch = "powerpc64")]
{
super::powerpc64::detect_features(x)
}
}

/// Detects ARM features:
/// Detects CPU features:
pub fn detect_features() -> usize {
// FIXME: use libc::getauxval, and if that fails /proc/auxv
// Try to read the ELF Auxiliary Vector
if let Ok(v) = auxvec::AuxVec::new() {
return detect_features_impl(v);
}
// Try to read /proc/cpuinfo
if let Ok(v) = cpuinfo::CpuInfo::new() {
return detect_features_impl(v);
Expand Down
10 changes: 8 additions & 2 deletions src/runtime/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,18 @@ mod aarch64;
#[cfg(all(target_arch = "aarch64", target_os = "linux"))]
pub use self::aarch64::__Feature;

#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
#[macro_use]
mod powerpc64;
#[cfg(all(target_arch = "powerpc64", target_os = "linux"))]
pub use self::powerpc64::__Feature;

#[cfg(all(target_os = "linux",
any(target_arch = "arm", target_arch = "aarch64")))]
any(target_arch = "arm", target_arch = "aarch64", target_arch = "powerpc64")))]
mod linux;

#[cfg(all(target_os = "linux",
any(target_arch = "arm", target_arch = "aarch64")))]
any(target_arch = "arm", target_arch = "aarch64", target_arch = "powerpc64")))]
pub use self::linux::detect_features;

/// Performs run-time feature detection.
Expand Down
Loading