@@ -162,80 +162,102 @@ fn inv_test_bit(v: usize, idx: u32) -> bool {
162162/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
163163/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
164164fn detect_features ( ) -> usize {
165- let ebx ;
166- let ecx ;
167- let edx ;
165+ let extended_features_ebx ;
166+ let proc_info_ecx ;
167+ let proc_info_edx ;
168168
169169 unsafe {
170170 /// To obtain all feature flags we need two CPUID queries:
171171
172172 /// 1. EAX=1, ECX=0: Queries "Processor Info and Feature Bits"
173173 /// This gives us most of the CPU features in ECX and EDX (see
174- /// below),
174+ /// below).
175175 asm ! ( "cpuid"
176- : "={ecx}" ( ecx ) , "={edx}" ( edx )
176+ : "={ecx}" ( proc_info_ecx ) , "={edx}" ( proc_info_edx )
177177 : "{eax}" ( 0x00000001u32 ) , "{ecx}" ( 0 as u32 )
178178 : : ) ;
179179
180180 /// 2. EAX=7, ECX=0: Queries "Extended Features"
181181 /// This gives us information about bmi,bmi2, and avx2 support
182- /// (see below).
182+ /// (see below); the result in ECX is not currently needed .
183183 asm ! ( "cpuid"
184- : "={ebx}" ( ebx )
184+ : "={ebx}" ( extended_features_ebx )
185185 : "{eax}" ( 0x00000007u32 ) , "{ecx}" ( 0 as u32 )
186186 : : ) ;
187187 }
188188
189189 let mut value: usize = 0 ;
190190
191- // CPUID call with EAX=7, ECX=0 => Extended Features in EBX and ECX
192- // (the result in ECX is not currently needed):
193- if inv_test_bit ( ebx, 3 ) {
191+ if inv_test_bit ( extended_features_ebx, 3 ) {
194192 value = set_bit ( value, __Feature:: bmi as u32 ) ;
195193 }
196- if inv_test_bit ( ebx, 5 ) {
197- value = set_bit ( value, __Feature:: avx2 as u32 ) ;
198- }
199- if inv_test_bit ( ebx, 8 ) {
194+ if inv_test_bit ( extended_features_ebx, 8 ) {
200195 value = set_bit ( value, __Feature:: bmi2 as u32 ) ;
201196 }
202197
203- // CPUID call with EAX=1 => feature bits in ECX and EDX:
204- if inv_test_bit ( ecx, 0 ) {
198+ if inv_test_bit ( proc_info_ecx, 0 ) {
205199 value = set_bit ( value, __Feature:: sse3 as u32 ) ;
206200 }
207- if inv_test_bit ( ecx , 5 ) {
201+ if inv_test_bit ( proc_info_ecx , 5 ) {
208202 value = set_bit ( value, __Feature:: abm as u32 ) ;
209203 }
210- if inv_test_bit ( ecx , 9 ) {
204+ if inv_test_bit ( proc_info_ecx , 9 ) {
211205 value = set_bit ( value, __Feature:: ssse3 as u32 ) ;
212206 }
213- if inv_test_bit ( ecx , 12 ) {
207+ if inv_test_bit ( proc_info_ecx , 12 ) {
214208 value = set_bit ( value, __Feature:: fma as u32 ) ;
215209 }
216- if inv_test_bit ( ecx , 19 ) {
210+ if inv_test_bit ( proc_info_ecx , 19 ) {
217211 value = set_bit ( value, __Feature:: sse4_1 as u32 ) ;
218212 }
219- if inv_test_bit ( ecx , 20 ) {
213+ if inv_test_bit ( proc_info_ecx , 20 ) {
220214 value = set_bit ( value, __Feature:: sse4_2 as u32 ) ;
221215 }
222- if inv_test_bit ( ecx , 21 ) {
216+ if inv_test_bit ( proc_info_ecx , 21 ) {
223217 value = set_bit ( value, __Feature:: tbm as u32 ) ;
224218 }
225- if inv_test_bit ( ecx , 23 ) {
219+ if inv_test_bit ( proc_info_ecx , 23 ) {
226220 value = set_bit ( value, __Feature:: popcnt as u32 ) ;
227221 }
228- if inv_test_bit ( ecx, 28 ) {
229- value = set_bit ( value, __Feature:: avx as u32 ) ;
230- }
231222
232- if inv_test_bit ( edx , 25 ) {
223+ if inv_test_bit ( proc_info_edx , 25 ) {
233224 value = set_bit ( value, __Feature:: sse as u32 ) ;
234225 }
235- if inv_test_bit ( edx , 26 ) {
226+ if inv_test_bit ( proc_info_edx , 26 ) {
236227 value = set_bit ( value, __Feature:: sse2 as u32 ) ;
237228 }
238229
230+ // ECX[26] detects XSAVE and ECX[27] detects OSXSAVE, that is, whether the
231+ // OS is AVX enabled and supports saving the state of the AVX/AVX2 vector
232+ // registers on context-switches, see:
233+ //
234+ // - https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
235+ // - https://hg.mozilla.
236+ // org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190
237+ //
238+ if inv_test_bit ( proc_info_ecx, 26 ) && inv_test_bit ( proc_info_ecx, 27 ) {
239+ unsafe fn xgetbv ( xcr_no : u32 ) -> u64 {
240+ let eax: u32 ;
241+ let edx: u32 ;
242+ // xgetbv
243+ asm ! ( "xgetbv"
244+ : "={eax}" ( eax) , "={edx}" ( edx)
245+ : "{ecx}" ( xcr_no)
246+ : : ) ;
247+ ( ( edx as u64 ) << 32 ) | ( eax as u64 )
248+ }
249+
250+ // This is safe because on x86 `xgetbv` is always available.
251+ if unsafe { xgetbv ( 0 ) } & 6 == 6 {
252+ if inv_test_bit ( proc_info_ecx, 28 ) {
253+ value = set_bit ( value, __Feature:: avx as u32 ) ;
254+ }
255+ if inv_test_bit ( extended_features_ebx, 5 ) {
256+ value = set_bit ( value, __Feature:: avx2 as u32 ) ;
257+ }
258+ }
259+ }
260+
239261 value
240262}
241263
0 commit comments