Skip to content

Commit bbb54f6

Browse files
sayantnAmanieu
authored andcommitted
Added runtime detection
Expanded the cache size to 93 (we will need this in near future) Fixed detection of VAES, GFNI and VPCLMULQDQ Could not test with `cupid` because they do not support these yet
1 parent 86098df commit bbb54f6

File tree

4 files changed

+67
-21
lines changed

4 files changed

+67
-21
lines changed

crates/std_detect/src/detect/arch/x86.rs

+15
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,11 @@ features! {
7676
/// * `"avx512bf16"`
7777
/// * `"avx512vp2intersect"`
7878
/// * `"avx512fp16"`
79+
/// * `"avxvnni"`
80+
/// * `"avxifma"`
81+
/// * `"avxneconvert"`
82+
/// * `"avxvnniint8"`
83+
/// * `"avxvnniint16"`
7984
/// * `"f16c"`
8085
/// * `"fma"`
8186
/// * `"bmi1"`
@@ -172,6 +177,16 @@ features! {
172177
/// AVX-512 P2INTERSECT
173178
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512fp16: "avx512fp16";
174179
/// AVX-512 FP16 (FLOAT16 instructions)
180+
@FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxifma: "avxifma";
181+
/// AVX-IFMA (Integer Fused Multiply Add)
182+
@FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxneconvert: "avxneconvert";
183+
/// AVX-NE-CONVERT (Exceptionless Convert)
184+
@FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxvnni: "avxvnni";
185+
/// AVX-VNNI (Vector Neural Network Instructions)
186+
@FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxvnniint16: "avxvnniint16";
187+
/// AVX-VNNI_INT8 (VNNI with 16-bit Integers)
188+
@FEATURE: #[unstable(feature = "avx512_target_feature", issue = "44839")] avxvnniint8: "avxvnniint8";
189+
/// AVX-VNNI_INT16 (VNNI with 8-bit integers)
175190
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
176191
/// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
177192
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";

crates/std_detect/src/detect/cache.rs

+19-11
Original file line numberDiff line numberDiff line change
@@ -9,30 +9,30 @@ use core::sync::atomic::AtomicUsize;
99

1010
/// Sets the `bit` of `x`.
1111
#[inline]
12-
const fn set_bit(x: u64, bit: u32) -> u64 {
12+
const fn set_bit(x: u128, bit: u32) -> u128 {
1313
x | 1 << bit
1414
}
1515

1616
/// Tests the `bit` of `x`.
1717
#[inline]
18-
const fn test_bit(x: u64, bit: u32) -> bool {
18+
const fn test_bit(x: u128, bit: u32) -> bool {
1919
x & (1 << bit) != 0
2020
}
2121

2222
/// Unset the `bit of `x`.
2323
#[inline]
24-
const fn unset_bit(x: u64, bit: u32) -> u64 {
24+
const fn unset_bit(x: u128, bit: u32) -> u128 {
2525
x & !(1 << bit)
2626
}
2727

2828
/// Maximum number of features that can be cached.
29-
const CACHE_CAPACITY: u32 = 62;
29+
const CACHE_CAPACITY: u32 = 93;
3030

3131
/// This type is used to initialize the cache
3232
// The derived `Default` implementation will initialize the field to zero,
3333
// which is what we want.
3434
#[derive(Copy, Clone, Default)]
35-
pub(crate) struct Initializer(u64);
35+
pub(crate) struct Initializer(u128);
3636

3737
// NOTE: the `debug_assert!` would catch that we do not add more Features than
3838
// the one fitting our cache.
@@ -71,10 +71,15 @@ impl Initializer {
7171
}
7272

7373
/// This global variable is a cache of the features supported by the CPU.
74-
// Note: on x64, we only use the first slot
75-
static CACHE: [Cache; 2] = [Cache::uninitialized(), Cache::uninitialized()];
76-
77-
/// Feature cache with capacity for `size_of::<usize::MAX>() * 8 - 1` features.
74+
// Note: the third slot is only used in x86
75+
// Another Slot can be added if needed without any change to `Initializer`
76+
static CACHE: [Cache; 3] = [
77+
Cache::uninitialized(),
78+
Cache::uninitialized(),
79+
Cache::uninitialized(),
80+
];
81+
82+
/// Feature cache with capacity for `size_of::<usize>() * 8 - 1` features.
7883
///
7984
/// Note: 0 is used to represent an uninitialized cache, and (at least) the most
8085
/// significant bit is set on any cache which has been initialized.
@@ -102,7 +107,7 @@ impl Cache {
102107
if cached == 0 {
103108
None
104109
} else {
105-
Some(test_bit(cached as u64, bit))
110+
Some(test_bit(cached as u128, bit))
106111
}
107112
}
108113

@@ -173,6 +178,7 @@ cfg_if::cfg_if! {
173178
fn do_initialize(value: Initializer) {
174179
CACHE[0].initialize((value.0) as usize & Cache::MASK);
175180
CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK);
181+
CACHE[2].initialize((value.0 >> 2 * Cache::CAPACITY) as usize & Cache::MASK);
176182
}
177183

178184
// We only have to detect features once, and it's fairly costly, so hint to LLVM
@@ -205,8 +211,10 @@ fn detect_and_initialize() -> Initializer {
205211
pub(crate) fn test(bit: u32) -> bool {
206212
let (relative_bit, idx) = if bit < Cache::CAPACITY {
207213
(bit, 0)
208-
} else {
214+
} else if bit < 2 * Cache::CAPACITY {
209215
(bit - Cache::CAPACITY, 1)
216+
} else {
217+
(bit - 2 * Cache::CAPACITY, 2)
210218
};
211219
CACHE[idx]
212220
.test(relative_bit)

crates/std_detect/src/detect/os/x86.rs

+21-9
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,17 @@ pub(crate) fn detect_features() -> cache::Initializer {
7474
extended_features_ecx,
7575
extended_features_edx,
7676
extended_features_eax_leaf_1,
77+
extended_features_edx_leaf_1,
7778
) = if max_basic_leaf >= 7 {
7879
let CpuidResult { ebx, ecx, edx, .. } = unsafe { __cpuid(0x0000_0007_u32) };
79-
let CpuidResult { eax: eax_1, .. } =
80-
unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) };
81-
(ebx, ecx, edx, eax_1)
80+
let CpuidResult {
81+
eax: eax_1,
82+
edx: edx_1,
83+
..
84+
} = unsafe { __cpuid_count(0x0000_0007_u32, 0x0000_0001_u32) };
85+
(ebx, ecx, edx, eax_1, edx_1)
8286
} else {
83-
(0, 0, 0, 0) // CPUID does not support "Extended Features"
87+
(0, 0, 0, 0, 0) // CPUID does not support "Extended Features"
8488
};
8589

8690
// EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported
@@ -129,6 +133,10 @@ pub(crate) fn detect_features() -> cache::Initializer {
129133
enable(proc_info_edx, 26, Feature::sse2);
130134
enable(extended_features_ebx, 29, Feature::sha);
131135

136+
enable(extended_features_ecx, 8, Feature::gfni);
137+
enable(extended_features_ecx, 9, Feature::vaes);
138+
enable(extended_features_ecx, 10, Feature::vpclmulqdq);
139+
132140
enable(extended_features_ebx, 3, Feature::bmi1);
133141
enable(extended_features_ebx, 8, Feature::bmi2);
134142

@@ -165,8 +173,8 @@ pub(crate) fn detect_features() -> cache::Initializer {
165173
let xcr0 = unsafe { _xgetbv(0) };
166174
// Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`:
167175
let os_avx_support = xcr0 & 6 == 6;
168-
// Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 224`:
169-
let os_avx512_support = xcr0 & 224 == 224;
176+
// Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 0xe0`:
177+
let os_avx512_support = xcr0 & 0xe0 == 0xe0;
170178

171179
// Only if the OS and the CPU support saving/restoring the AVX
172180
// registers we enable `xsave` support:
@@ -203,6 +211,13 @@ pub(crate) fn detect_features() -> cache::Initializer {
203211
enable(proc_info_ecx, 28, Feature::avx);
204212
enable(extended_features_ebx, 5, Feature::avx2);
205213

214+
// "Short" versions of AVX512 instructions
215+
enable(extended_features_eax_leaf_1, 4, Feature::avxvnni);
216+
enable(extended_features_eax_leaf_1, 23, Feature::avxifma);
217+
enable(extended_features_edx_leaf_1, 4, Feature::avxvnniint8);
218+
enable(extended_features_edx_leaf_1, 5, Feature::avxneconvert);
219+
enable(extended_features_edx_leaf_1, 10, Feature::avxvnniint16);
220+
206221
// For AVX-512 the OS also needs to support saving/restoring
207222
// the extended state, only then we enable AVX-512 support:
208223
if os_avx512_support {
@@ -216,9 +231,6 @@ pub(crate) fn detect_features() -> cache::Initializer {
216231
enable(extended_features_ebx, 31, Feature::avx512vl);
217232
enable(extended_features_ecx, 1, Feature::avx512vbmi);
218233
enable(extended_features_ecx, 6, Feature::avx512vbmi2);
219-
enable(extended_features_ecx, 8, Feature::gfni);
220-
enable(extended_features_ecx, 9, Feature::vaes);
221-
enable(extended_features_ecx, 10, Feature::vpclmulqdq);
222234
enable(extended_features_ecx, 11, Feature::avx512vnni);
223235
enable(extended_features_ecx, 12, Feature::avx512bitalg);
224236
enable(extended_features_ecx, 14, Feature::avx512vpopcntdq);

crates/std_detect/tests/x86-specific.rs

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
22
#![allow(internal_features)]
3-
#![feature(stdarch_internal)]
3+
#![feature(stdarch_internal, avx512_target_feature)]
44

55
extern crate cupid;
66
#[macro_use]
@@ -68,6 +68,17 @@ fn dump() {
6868
println!("adx: {:?}", is_x86_feature_detected!("adx"));
6969
println!("rtm: {:?}", is_x86_feature_detected!("rtm"));
7070
println!("movbe: {:?}", is_x86_feature_detected!("movbe"));
71+
println!("avxvnni: {:?}", is_x86_feature_detected!("avxvnni"));
72+
println!("avxvnniint8: {:?}", is_x86_feature_detected!("avxvnniint8"));
73+
println!(
74+
"avxneconvert: {:?}",
75+
is_x86_feature_detected!("avxneconvert")
76+
);
77+
println!("avxifma: {:?}", is_x86_feature_detected!("avxifma"));
78+
println!(
79+
"avxvnniint16: {:?}",
80+
is_x86_feature_detected!("avxvnniint16")
81+
);
7182
}
7283

7384
#[cfg(feature = "std_detect_env_override")]

0 commit comments

Comments
 (0)