Skip to content

ggml-cpu: Build variant targeting Neoverse-V2 #14380

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ggml/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -317,9 +317,7 @@ if (GGML_CPU_ALL_VARIANTS)
endif()
elseif(GGML_SYSTEM_ARCH STREQUAL "ARM")
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
# Many of these features are optional so we build versions with popular
# combinations and name the backends based on the version they were
# first released with
# Generic ARM builds
ggml_add_cpu_backend_variant(armv8.0_1)
ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD)
ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
Expand All @@ -328,6 +326,9 @@ if (GGML_CPU_ALL_VARIANTS)
ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2)
ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME)
ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME)
# Builds optimized for specific cores
# neoverse-v2: AWS Graviton4, NVIDIA Grace
ggml_add_cpu_backend_variant(neoverse-v2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2)
elseif (CMAKE_SYSTEM_NAME MATCHES "Android")
# Android-specific backends with SoC-compatible feature sets
ggml_add_cpu_backend_variant(android_armv8.0_1)
Expand Down
97 changes: 51 additions & 46 deletions ggml/src/ggml-cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -154,53 +154,58 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
check_arm_feature(sme "#include <arm_sme.h>\n__arm_locally_streaming int main() { __asm__ volatile(\"smstart; smstop;\"); return 0; }")

list(APPEND ARCH_FLAGS "${ARM_MCPU_FLAG}${ARM_MCPU_FLAG_FIX}")
else()
if (GGML_CPU_ARM_ARCH)
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
elseif(GGML_CPU_ALL_VARIANTS)
# Begin with the lowest baseline
set(ARM_MCPU "armv8-a")
set(ARCH_TAGS "")
set(ARCH_DEFINITIONS "")

# When a feature is selected, bump the MCPU to the first
# version that supported it
if (GGML_INTERNAL_DOTPROD)
set(ARM_MCPU "armv8.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
endif()
if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
set(ARM_MCPU "armv8.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+fp16")
list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
endif()
if (GGML_INTERNAL_SVE)
set(ARM_MCPU "armv8.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+sve")
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
endif()
if (GGML_INTERNAL_MATMUL_INT8)
set(ARM_MCPU "armv8.6-a")
set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
endif()
if (GGML_INTERNAL_SVE2)
set(ARM_MCPU "armv8.6-a")
set(ARCH_TAGS "${ARCH_TAGS}+sve2")
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
endif()
if (GGML_INTERNAL_NOSVE)
set(ARCH_TAGS "${ARCH_TAGS}+nosve")
endif()
if (GGML_INTERNAL_SME)
set(ARM_MCPU "armv9.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+sme")
list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
endif()
list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}")
ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
elseif (GGML_CPU_ARM_ARCH)
list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH})
elseif(GGML_CPU_ALL_VARIANTS)
# For the generic builds, begin with the lowest supported baseline
set(ARM_GENERIC_ARCH "armv8-a")
set(ARCH_TAGS "")
set(ARCH_DEFINITIONS "")

# When a feature is selected, bump GENERIC_ARCH to the earliest
# version which supported that feature
if (GGML_INTERNAL_DOTPROD)
set(ARM_GENERIC_ARCH "armv8.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+dotprod")
list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD)
endif()
if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC)
set(ARM_GENERIC_ARCH "armv8.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+fp16")
list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC)
endif()
if (GGML_INTERNAL_SVE)
set(ARM_GENERIC_ARCH "armv8.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+sve")
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE)
endif()
if (GGML_INTERNAL_MATMUL_INT8)
set(ARM_GENERIC_ARCH "armv8.6-a")
set(ARCH_TAGS "${ARCH_TAGS}+i8mm")
list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8)
endif()
if (GGML_INTERNAL_SVE2)
set(ARM_MCPU "armv8.6-a")
set(ARCH_TAGS "${ARCH_TAGS}+sve2")
list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2)
endif()
if (GGML_INTERNAL_NOSVE)
set(ARCH_TAGS "${ARCH_TAGS}+nosve")
endif()
if (GGML_INTERNAL_SME)
set(ARM_MCPU "armv9.2-a")
set(ARCH_TAGS "${ARCH_TAGS}+sme")
list(APPEND ARCH_DEFINITIONS GGML_USE_SME)
endif()

# CPU targeted builds first, else do the generic build
if (${tag_name} STREQUAL "neoverse-v2")
list(APPEND ARCH_FLAGS "-mcpu=neoverse-v2${ARCH_TAGS}")
list(APPEND ARCH_DEFINITIONS GGML_ARM_MCPU=NEOVERSE_V2)
else()
list(APPEND ARCH_FLAGS "-march=${ARM_GENERIC_ARCH}${ARCH_TAGS}")
endif()
ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS})
endif()

# show enabled features
Expand Down
36 changes: 30 additions & 6 deletions ggml/src/ggml-cpu/arch/arm/cpu-feats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

#if defined(__linux__)
#include <sys/auxv.h>

#include <fstream>
#include <string>
#elif defined(__APPLE__)
#include <sys/sysctl.h>
#endif
Expand All @@ -17,6 +20,7 @@
#endif

struct aarch64_features {
int cpu_part = -1;
// has_neon not needed, aarch64 has NEON guaranteed
bool has_dotprod = false;
bool has_fp16_va = false;
Expand All @@ -36,6 +40,17 @@ struct aarch64_features {
has_sve2 = !!(hwcap2 & HWCAP2_SVE2);
has_i8mm = !!(hwcap2 & HWCAP2_I8MM);
has_sme = !!(hwcap2 & HWCAP2_SME);

std::ifstream cpuinfo("/proc/cpuinfo");
std::string line;
while (std::getline(cpuinfo, line)) {
if (line.find("CPU part") == 0) {
// Parse the hex number after the colon
cpu_part = std::stoi(line.substr(line.find(':') + 1), nullptr, 16);
break;
}
}
cpuinfo.close();
#elif defined(__APPLE__)
int oldp = 0;
size_t size = sizeof(oldp);
Expand All @@ -61,29 +76,38 @@ static int ggml_backend_cpu_aarch64_score() {
int score = 1;
aarch64_features af;

// Bits 2-8 are used to rank backends by architecture or core when they
// otherwise have identical features.
#if defined(GGML_ARM_MCPU) && GGML_ARM_MCPU == NEOVERSE_V2
if (af.cpu_part == 0xd4f) {
score += 1<<5;
}
#endif

// Bits 9+: Features always trump architecture or core.
#ifdef GGML_USE_DOTPROD
if (!af.has_dotprod) { return 0; }
score += 1<<1;
score += 1<<8;
#endif
#ifdef GGML_USE_FP16_VECTOR_ARITHMETIC
if (!af.has_fp16_va) { return 0; }
score += 1<<2;
score += 1<<9;
#endif
#ifdef GGML_USE_SVE
if (!af.has_sve) { return 0; }
score += 1<<3;
score += 1<<10;
#endif
#ifdef GGML_USE_MATMUL_INT8
if (!af.has_i8mm) { return 0; }
score += 1<<4;
score += 1<<11;
#endif
#ifdef GGML_USE_SVE2
if (!af.has_sve2) { return 0; }
score += 1<<5;
score += 1<<12;
#endif
#ifdef GGML_USE_SME
if (!af.has_sme) { return 0; }
score += 1<<6;
score += 1<<13;
#endif

return score;
Expand Down
Loading