Skip to content

Add runtime detection for new AMX variants and MOVRS #1763

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/docker/x86_64-unknown-linux-gnu-emulated/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
wget \
xz-utils

RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.48.0-2024-11-25-lin.tar.xz -O sde.tar.xz
RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.53.0-2025-03-16-lin.tar.xz -O sde.tar.xz
RUN mkdir intel-sde
RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \
Expand Down
36 changes: 23 additions & 13 deletions ci/docker/x86_64-unknown-linux-gnu-emulated/cpuid.def
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (C) 2017-2024 Intel Corporation.
# Copyright (C) 2024-2024 Intel Corporation.
#
# This software and the related documents are Intel copyrighted materials, and your
# use of them is governed by the express license under which they were provided to
Expand All @@ -9,27 +9,33 @@
# This software and the related documents are provided as is, with no express or
# implied warranties, other than those that are expressly stated in the License.
#
# The CPUID information in this file is for software enabling purposes only and
# it is not a full and accurate representation of the CPU under development which
# it represents.
# The CPUID information in this file is not a guarantee of the availability of
# features or characteristics in the final released CPU.
#
# CPUID_VERSION = 1.0
# Input => Output
# EAX ECX => EAX EBX ECX EDX
00000000 ******** => 00000024 68747541 444d4163 69746e65
00000001 ******** => 000806f0 00100800 7ffaf3ff bfebfbff
00000001 ******** => 000d06f0 00100800 7ffaf3ff bfebfbff
00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000
00000003 ******** => 00000000 00000000 00000000 00000000
00000004 00000000 => 7c004121 01c0003f 0000003f 00000000 #Deterministic Cache
00000004 00000000 => 7c004121 02c0003f 0000003f 00000000 #Deterministic Cache
00000004 00000001 => 7c004122 01c0003f 0000003f 00000000
00000004 00000002 => 7c004143 03c0003f 000003ff 00000000
00000004 00000003 => 7c0fc163 0280003f 0000dfff 00000004
00000004 00000002 => 7c004143 03c0003f 000007ff 00000000
00000004 00000003 => 7c0fc163 04c0003f 0005ffff 00000004
00000004 00000004 => 00000000 00000000 00000000 00000000
00000005 ******** => 00000040 00000040 00000003 00042120 #MONITOR/MWAIT
00000006 ******** => 00000077 00000002 00000001 00000000 #Thermal and Power
00000007 00000000 => 00000001 f3bfbfbf bac05ffe 03d54130 #Extended Features
00000007 00000001 => 18ee00bf 00000002 00000000 1d29cd3e
00000007 00000000 => 00000001 f3bfbfbf bbc05ffe 03d55130 #Extended Features
00000007 00000001 => 88ee00bf 00000002 00000000 1d29cd3e
00000008 ******** => 00000000 00000000 00000000 00000000
00000009 ******** => 00000000 00000000 00000000 00000000 #Direct Cache
0000000a ******** => 07300403 00000000 00000000 00000603
0000000b 00000000 => 00000001 00000002 00000100 00000000 #Extended Topology
0000000b 00000001 => 00000004 00000002 00000201 00000000
0000000b 00000000 => 00000001 00000002 00000100 0000001e #Extended Topology
0000000b 00000001 => 00000004 00000002 00000201 0000001e
0000000c ******** => 00000000 00000000 00000000 00000000
0000000d 00000000 => 000e02e7 00002b00 00002b00 00000000 #xcr0
0000000d 00000001 => 0000001f 00000240 00000100 00000000
Expand All @@ -44,7 +50,11 @@
00000019 ******** => 00000000 00000005 00000000 00000000 #Key Locker
0000001d 00000000 => 00000001 00000000 00000000 00000000 #AMX Tile
0000001d 00000001 => 04002000 00080040 00000010 00000000 #AMX Palette1
0000001e 00000000 => 00000000 00004010 00000000 00000000 #AMX Tmul
0000001e 00000000 => 00000001 00004010 00000000 00000000 #AMX Tmul
0000001e 00000001 => 000001ff 00000000 00000000 00000000
0000001f 00000000 => 00000001 00000002 00000100 0000001e
0000001f 00000001 => 00000007 00000070 00000201 0000001e
0000001f 00000002 => 00000000 00000000 00000002 0000001e
00000024 00000000 => 00000000 00070002 00000000 00000000 #AVX10
80000000 ******** => 80000008 00000000 00000000 00000000
80000001 ******** => 00000000 00000000 00200961 2c100000
Expand All @@ -56,6 +66,6 @@
80000007 ******** => 00000000 00000000 00000000 00000100
80000008 ******** => 00003028 00000200 00000200 00000000

# This file was copied from intel-sde/misc/cpuid/future/cpuid.def, and modified to
# use "AuthenticAMD" as the vendor and the support for `XOP`, `SSE4a`, `TBM` and
# `AVX512_VP2INTERSECT` was added in the CPUID.
# This file was copied from intel-sde/misc/cpuid/dmr/cpuid.def, and modified to
# use "AuthenticAMD" as the vendor and the support for `XOP`, `SSE4a`, `TBM`,
# `AVX512_VP2INTERSECT` and the VEX variants of AVX512 was added in the CPUID.
22 changes: 20 additions & 2 deletions crates/std_detect/src/detect/arch/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ features! {
/// * `"amx-bf16"`
/// * `"amx-fp16"`
/// * `"amx-complex"`
/// * `"amx-avx512"`
/// * `"amx-fp8"`
/// * `"amx-movrs"`
/// * `"amx-tf32"`
/// * `"amx-transpose"`
/// * `"f16c"`
/// * `"fma"`
/// * `"bmi1"`
Expand All @@ -109,6 +114,8 @@ features! {
/// * `"rtm"`
/// * `"movbe"`
/// * `"ermsb"`
/// * `"movrs"`
/// * `"xop"`
///
/// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
#[stable(feature = "simd_x86", since = "1.27.0")]
Expand Down Expand Up @@ -177,8 +184,7 @@ features! {
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi: "avx512vbmi";
/// AVX-512 VBMI (Vector Byte Manipulation Instructions)
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpopcntdq: "avx512vpopcntdq";
/// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and
/// Quadword)
/// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and Quadword)
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2";
/// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities)
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] gfni: "gfni";
Expand Down Expand Up @@ -217,6 +223,16 @@ features! {
/// AMX-FP16 (Float16 Operations)
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_complex: "amx-complex";
/// AMX-COMPLEX (Complex number Operations)
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_avx512: "amx-avx512";
/// AMX-AVX512 (AVX512 operations extended to matrices)
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_fp8: "amx-fp8";
/// AMX-FP8 (Float8 Operations)
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_movrs: "amx-movrs";
/// AMX-MOVRS (Matrix MOVERS operations)
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_tf32: "amx-tf32";
/// AMX-TF32 (TensorFloat32 Operations)
@FEATURE: #[unstable(feature = "x86_amx_intrinsics", issue = "126622")] amx_transpose: "amx-transpose";
/// AMX-TRANSPOSE (Matrix Transpose Operations)
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c";
/// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats)
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma";
Expand Down Expand Up @@ -253,6 +269,8 @@ features! {
/// RTM, Intel (Restricted Transactional Memory)
@FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe";
/// MOVBE (Move Data After Swapping Bytes)
@FEATURE: #[unstable(feature = "movrs_target_feature", issue = "137976")] movrs: "movrs";
/// MOVRS (Move data with the read-shared hint)
@FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb";
/// ERMSB, Enhanced REP MOVSB and STOSB
@FEATURE: #[unstable(feature = "xop_target_feature", issue = "127208")] xop: "xop";
Expand Down
29 changes: 22 additions & 7 deletions crates/std_detect/src/detect/os/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ pub(crate) fn detect_features() -> cache::Initializer {

enable(extended_features_ebx, 9, Feature::ermsb);

enable(extended_features_eax_leaf_1, 31, Feature::movrs);

// Detect if CPUID.19h available
if bit::test(extended_features_ecx as usize, 23) {
let CpuidResult { ebx, .. } = unsafe { __cpuid(0x19) };
Expand Down Expand Up @@ -250,14 +252,27 @@ pub(crate) fn detect_features() -> cache::Initializer {
enable(extended_features_edx, 8, Feature::avx512vp2intersect);
enable(extended_features_edx, 23, Feature::avx512fp16);
enable(extended_features_eax_leaf_1, 5, Feature::avx512bf16);
}
}

if os_amx_support {
enable(extended_features_edx, 24, Feature::amx_tile);
enable(extended_features_edx, 25, Feature::amx_int8);
enable(extended_features_edx, 22, Feature::amx_bf16);
enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);

if max_basic_leaf >= 0x1e {
let CpuidResult {
eax: amx_feature_flags_eax,
..
} = unsafe { __cpuid_count(0x1e_u32, 1) };

if os_amx_support {
enable(extended_features_edx, 24, Feature::amx_tile);
enable(extended_features_edx, 25, Feature::amx_int8);
enable(extended_features_edx, 22, Feature::amx_bf16);
enable(extended_features_eax_leaf_1, 21, Feature::amx_fp16);
enable(extended_features_edx_leaf_1, 8, Feature::amx_complex);
}
enable(amx_feature_flags_eax, 4, Feature::amx_fp8);
enable(amx_feature_flags_eax, 5, Feature::amx_transpose);
enable(amx_feature_flags_eax, 6, Feature::amx_tf32);
enable(amx_feature_flags_eax, 7, Feature::amx_avx512);
enable(amx_feature_flags_eax, 8, Feature::amx_movrs);
}
}
}
Expand Down
12 changes: 11 additions & 1 deletion crates/std_detect/tests/x86-specific.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
sha512_sm_x86,
x86_amx_intrinsics,
xop_target_feature,
keylocker_x86
keylocker_x86,
movrs_target_feature
)]

extern crate cupid;
Expand Down Expand Up @@ -97,6 +98,15 @@ fn dump() {
println!("xop: {:?}", is_x86_feature_detected!("xop"));
println!("kl: {:?}", is_x86_feature_detected!("kl"));
println!("widekl: {:?}", is_x86_feature_detected!("widekl"));
println!("movrs: {:?}", is_x86_feature_detected!("movrs"));
println!("amx-fp8: {:?}", is_x86_feature_detected!("amx-fp8"));
println!(
"amx-transpose: {:?}",
is_x86_feature_detected!("amx-transpose")
);
println!("amx-tf32: {:?}", is_x86_feature_detected!("amx-tf32"));
println!("amx-avx512: {:?}", is_x86_feature_detected!("amx-avx512"));
println!("amx-movrs: {:?}", is_x86_feature_detected!("amx-movrs"));
}

#[cfg(feature = "std_detect_env_override")]
Expand Down