@@ -106,9 +106,11 @@ pub(crate) fn detect_features() -> cache::Initializer {
106
106
{
107
107
// borrows value till the end of this scope:
108
108
let mut enable = |r, rb, f| {
109
- if bit:: test ( r as usize , rb) {
109
+ let present = bit:: test ( r as usize , rb) ;
110
+ if present {
110
111
value. set ( f as u32 ) ;
111
112
}
113
+ present
112
114
} ;
113
115
114
116
enable ( proc_info_ecx, 0 , Feature :: sse3) ;
@@ -120,7 +122,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
120
122
enable ( proc_info_ecx, 22 , Feature :: movbe) ;
121
123
enable ( proc_info_ecx, 23 , Feature :: popcnt) ;
122
124
enable ( proc_info_ecx, 25 , Feature :: aes) ;
123
- enable ( proc_info_ecx, 29 , Feature :: f16c) ;
125
+ let f16c = enable ( proc_info_ecx, 29 , Feature :: f16c) ;
124
126
enable ( proc_info_ecx, 30 , Feature :: rdrand) ;
125
127
enable ( extended_features_ebx, 18 , Feature :: rdseed) ;
126
128
enable ( extended_features_ebx, 19 , Feature :: adx) ;
@@ -216,7 +218,7 @@ pub(crate) fn detect_features() -> cache::Initializer {
216
218
}
217
219
218
220
// FMA (uses 256-bit wide registers):
219
- enable ( proc_info_ecx, 12 , Feature :: fma) ;
221
+ let fma = enable ( proc_info_ecx, 12 , Feature :: fma) ;
220
222
221
223
// And AVX/AVX2:
222
224
enable ( proc_info_ecx, 28 , Feature :: avx) ;
@@ -235,7 +237,11 @@ pub(crate) fn detect_features() -> cache::Initializer {
235
237
236
238
// For AVX-512 the OS also needs to support saving/restoring
237
239
// the extended state, only then we enable AVX-512 support:
238
- if os_avx512_support {
240
+ // Also, Rust makes `avx512f` imply `fma` and `f16c`, because
241
+ // otherwise the assembler is broken. But Intel doesn't guarantee
242
+ // that `fma` and `f16c` are available with `avx512f`, so we
243
+ // need to check for them separately.
244
+ if os_avx512_support && f16c && fma {
239
245
enable ( extended_features_ebx, 16 , Feature :: avx512f) ;
240
246
enable ( extended_features_ebx, 17 , Feature :: avx512dq) ;
241
247
enable ( extended_features_ebx, 21 , Feature :: avx512ifma) ;
0 commit comments