Skip to content

Commit c6eac51

Browse files
committed
increase feature cache size
1 parent 71a1316 commit c6eac51

File tree

7 files changed

+96
-40
lines changed

7 files changed

+96
-40
lines changed

coresimd/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
#![allow(unused_features)]
1414
#![feature(const_fn, link_llvm_intrinsics, platform_intrinsics, repr_simd,
1515
simd_ffi, target_feature, cfg_target_feature, i128_type, asm,
16-
const_atomic_usize_new, stmt_expr_attributes, core_intrinsics,
16+
integer_atomics, stmt_expr_attributes, core_intrinsics,
1717
crate_in_paths)]
1818
#![cfg_attr(test, feature(proc_macro, test, attr_literals))]
1919
#![cfg_attr(feature = "cargo-clippy",

coresimd/src/runtime/aarch64.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Run-time feature detection on ARM Aarch64.
2-
use runtime::bit;
32
use runtime::arch::HasFeature;
3+
use runtime::cache;
44

55
#[macro_export]
66
#[doc(hidden)]
@@ -32,12 +32,12 @@ pub enum __Feature {
3232
pmull,
3333
}
3434

35-
pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
36-
let mut value: usize = 0;
35+
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
36+
let mut value = cache::Initializer::new();
3737
{
3838
let mut enable_feature = |f| {
3939
if x.has_feature(&f) {
40-
value = bit::set(value, f as u32);
40+
value.set(f as u32);
4141
}
4242
};
4343
enable_feature(__Feature::asimd);

coresimd/src/runtime/arm.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//! Run-time feature detection on ARM Aarch32.
2-
use runtime::bit;
2+
use runtime::cache;
33
use runtime::arch::HasFeature;
44

55
#[macro_export]
@@ -28,12 +28,12 @@ pub enum __Feature {
2828
pmull,
2929
}
3030

31-
pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
32-
let mut value: usize = 0;
31+
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
32+
let mut value = cache::Initializer::new();
3333
{
3434
let mut enable_feature = |f| {
3535
if x.has_feature(&f) {
36-
value = bit::set(value, f as u32);
36+
value.set(f as u32);
3737
}
3838
};
3939
enable_feature(__Feature::neon);

coresimd/src/runtime/bit.rs

-11
This file was deleted.

coresimd/src/runtime/cache.rs

+79-12
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,97 @@
1-
//! Cache of run-time feature detection
1+
//! Caches run-time feature detection so that it only needs to be computed
2+
//! once.
23
3-
use core::sync::atomic::{AtomicUsize, Ordering};
4+
use core::sync::atomic::{AtomicU64, Ordering};
45

5-
use super::bit;
6+
/// Sets the `bit` of `x`.
7+
pub const fn set_bit(x: u64, bit: u32) -> u64 {
8+
x | 1 << bit
9+
}
10+
11+
/// Tests the `bit` of `x`.
12+
pub const fn test_bit(x: u64, bit: u32) -> bool {
13+
x & (1 << bit) != 0
14+
}
15+
16+
/// Maximum number of features that can be cached.
17+
const CACHE_CAPACITY: u32 = 63;
18+
19+
/// This type is used to initialize the cache
20+
pub struct Initializer(u64);
21+
22+
impl Initializer {
23+
/// Creates a cleared cache.
24+
pub fn new() -> Self {
25+
Initializer(0)
26+
}
27+
/// Tests the `bit` of the cache.
28+
pub fn test(&self, bit: u32) -> bool {
29+
// FIXME: this way of making sure that the cache is large enough is
30+
// brittle.
31+
debug_assert!(
32+
bit < CACHE_CAPACITY,
33+
"too many features, time to increase the cache size!"
34+
);
35+
test_bit(self.0, bit)
36+
}
37+
/// Sets the `bit` of the cache.
38+
pub fn set(&mut self, bit: u32) {
39+
// FIXME: this way of making sure that the cache is large enough is
40+
// brittle.
41+
debug_assert!(
42+
bit < CACHE_CAPACITY,
43+
"too many features, time to increase the cache size!"
44+
);
45+
let v = self.0;
46+
self.0 = set_bit(v, bit);
47+
}
48+
}
649

7-
/// This global variable is a bitset used to cache the features supported by
8-
/// the
9-
/// CPU.
10-
static CACHE: AtomicUsize = AtomicUsize::new(usize::max_value());
50+
/// Feature cache with capacity for `CACHE_CAPACITY` features.
51+
///
52+
/// Note: the last feature bit is used to represent an
53+
/// uninitialized cache.
54+
struct Cache(AtomicU64);
55+
56+
/// This global variable is a cache of the features supported by the CPU.
57+
static CACHE: Cache = Cache::uninitialized();
58+
59+
impl Cache {
60+
/// Creates an uninitialized cache.
61+
const fn uninitialized() -> Self {
62+
Cache(AtomicU64::new(u64::max_value()))
63+
}
64+
/// Is the cache uninitialized?
65+
pub fn is_uninitialized(&self) -> bool {
66+
self.0.load(Ordering::Relaxed) == u64::max_value()
67+
}
68+
69+
/// Is the `bit` in the cache set?
70+
pub fn test(&self, bit: u32) -> bool {
71+
test_bit(CACHE.0.load(Ordering::Relaxed), bit)
72+
}
73+
74+
pub fn set(&self, value: Initializer) {
75+
self.0.store(value.0, Ordering::Relaxed);
76+
}
77+
}
1178

1279
/// Test the `bit` of the storage. If the storage has not been initialized,
1380
/// initializes it with the result of `f()`.
1481
///
1582
/// On its first invocation, it detects the CPU features and caches them in the
16-
/// `FEATURES` global variable as an `AtomicUsize`.
83+
/// `FEATURES` global variable as an `AtomicU64`.
1784
///
1885
/// It uses the `__Feature` variant to index into this variable as a bitset. If
1986
/// the bit is set, the feature is enabled, and otherwise it is disabled.
2087
///
2188
/// PLEASE: do not use this, it is an implementation detail subject to change.
2289
pub fn test<F>(bit: u32, f: F) -> bool
2390
where
24-
F: FnOnce() -> usize,
91+
F: FnOnce() -> Initializer,
2592
{
26-
if CACHE.load(Ordering::Relaxed) == usize::max_value() {
27-
CACHE.store(f(), Ordering::Relaxed);
93+
if CACHE.is_uninitialized() {
94+
CACHE.set(f());
2895
}
29-
bit::test(CACHE.load(Ordering::Relaxed), bit)
96+
CACHE.test(bit)
3097
}

coresimd/src/runtime/powerpc64.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
//! Run-time feature detection on PowerPC64.
2-
use runtime::bit;
2+
use runtime::cache;
33
use runtime::arch::HasFeature;
44

55
#[macro_export]
@@ -33,12 +33,12 @@ pub enum __Feature {
3333
power8,
3434
}
3535

36-
pub fn detect_features<T: HasFeature>(mut x: T) -> usize {
37-
let mut value: usize = 0;
36+
pub fn detect_features<T: HasFeature>(mut x: T) -> cache::Initializer {
37+
let mut value = cache::Initializer::new();
3838
{
3939
let mut enable_feature = |f| {
4040
if x.has_feature(&f) {
41-
value = bit::set(value, f as u32);
41+
value.set(f as u32);
4242
}
4343
};
4444
enable_feature(__Feature::altivec);

coresimd/src/runtime/x86.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
1919
use core::mem;
2020

21-
use super::bit;
21+
use super::{bit, cache};
2222

2323
/// This macro maps the string-literal feature names to values of the
2424
/// `__Feature` enum at compile-time. The feature names used are the same as
@@ -261,10 +261,10 @@ pub enum __Feature {
261261
/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID
262262
/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
263263
/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
264-
pub fn detect_features() -> usize {
264+
pub fn detect_features() -> cache::Initializer {
265265
use vendor::{__cpuid, __cpuid_count, has_cpuid, CpuidResult};
266266
use vendor::_xgetbv;
267-
let mut value: usize = 0;
267+
let mut value = cache::Initializer::new();
268268

269269
// If the x86 CPU does not support the CPUID instruction then it is too
270270
// old to support any of the currently-detectable features.
@@ -340,7 +340,7 @@ pub fn detect_features() -> usize {
340340
// borrows value till the end of this scope:
341341
let mut enable = |r, rb, f| {
342342
if bit::test(r as usize, rb) {
343-
value = bit::set(value, f as u32);
343+
value.set(f as u32);
344344
}
345345
};
346346

0 commit comments

Comments
 (0)